Compare commits

...

17 Commits

Author SHA1 Message Date
a3642f127c Verify before printing 2024-09-12 20:41:23 +03:00
7f3a3ac1f8 modify how block is codegenerated 2024-08-25 23:28:01 +03:00
e21f47e34b Implement some kind of if/else 2024-08-25 23:17:52 +03:00
8defa39b31 Restructure a bit 2024-08-25 22:29:45 +03:00
6a4c30e49e Add scope.inner 2024-08-22 00:34:58 +03:00
8b8cd2c464 Add comments to easy.reid 2024-08-21 23:31:31 +03:00
6448b0c438 Add necessary codegen for easy.reid 2024-08-21 23:31:09 +03:00
e00d9afc7b Add Scope 2024-08-21 23:07:04 +03:00
7208fe962e Read return and function name from signature 2024-08-21 22:14:32 +03:00
525dab2147 dd IRValue, IRBlock 2024-08-21 22:00:53 +03:00
9b5d8acdb4 Add IRFunction, mess with lifetimes 2024-08-21 21:16:23 +03:00
22ee941ad6 Move module creation to IRModule 2024-08-21 20:12:47 +03:00
9a74158ae7 Minor tweaks 2024-08-21 20:08:02 +03:00
dd3e0618ae Add the very most basic codegen 2024-07-17 20:43:28 +03:00
8176dc98a3 Add TokenStream 2024-07-17 20:08:24 +03:00
c5c9cd3458 Add parse_if and parse_map 2024-07-17 19:28:06 +03:00
c6e6e1dbee Fix operator precedence parsing 2024-07-17 18:39:31 +03:00
12 changed files with 663 additions and 506 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
src/old_llvm
/target
/.vscode
.env

View File

@ -1,9 +1,9 @@
use reid::compile;
pub static EASIEST: &str = include_str!("./reid/easiest.reid");
pub static ARITHMETIC: &str = include_str!("./reid/arithmetic.reid");
fn main() {
let text = match compile(EASIEST) {
let text = match compile(ARITHMETIC) {
Ok(t) => t,
Err(e) => panic!("{}", e),
};

View File

@ -0,0 +1,14 @@
// Arithmetic, function calls and imports!
fn main() {
let test = 9;
let simpleAdd = 2 + 2;
let simpleMult = 7 * 2; // 14
let arithmetic = 3 + 2 * 5 + 1 * 2; // 15
if simpleAdd < test {
return 3;
}
return arithmetic + simpleMult * arithmetic;
}

View File

@ -1,12 +0,0 @@
// Arithmetic, function calls and imports!
import std::print;
fn main() {
let test = 5;
let simpleAdd = 2 + 2;
let arithmetic = 3 + 2 * 5 + 1 * 2;
let multiplier = 5 * 2;
return arithmetic + multiplier * arithmetic;
}

View File

@ -1,17 +1,9 @@
// Hello, comment here!
import std::print;
// Main
fn main() {
let hello = 32 + {
2 + 3
};
let beep = hello + fibonacci();
return beep;
return fibonacci(10);
}
// Fibonacci
fn fibonacci(value: i32) -> i32 {
if value < 3 {
return 1;

View File

@ -1,6 +1,6 @@
use crate::{
lexer::Token,
token_stream::{Error, TokenStream},
token_stream::{Error, TokenRange, TokenStream},
};
pub trait Parse
@ -11,20 +11,25 @@ where
}
#[derive(Debug, Clone)]
pub enum Type {
pub struct Type(pub TypeKind, pub TokenRange);
#[derive(Debug, Clone)]
pub enum TypeKind {
I32,
}
impl Parse for Type {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
if let Some(Token::Identifier(ident)) = stream.next() {
let kind = if let Some(Token::Identifier(ident)) = stream.next() {
Ok(match &*ident {
"i32" => Type::I32,
"i32" => TypeKind::I32,
_ => panic!("asd"),
})
} else {
Err(stream.expected_err("type identifier")?)
}
}?;
Ok(Type(kind, stream.get_range().unwrap()))
}
}
@ -34,7 +39,10 @@ pub enum Literal {
}
#[derive(Debug, Clone)]
pub enum Expression {
pub struct Expression(pub ExpressionKind, pub TokenRange);
#[derive(Debug, Clone)]
pub enum ExpressionKind {
VariableName(String),
Literal(Literal),
Binop(BinaryOperator, Box<Expression>, Box<Expression>),
@ -51,16 +59,32 @@ impl Parse for Expression {
}
fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, Error> {
use ExpressionKind as Kind;
if let Ok(exp) = stream.parse() {
Ok(Expression::FunctionCall(Box::new(exp)))
Ok(Expression(
Kind::FunctionCall(Box::new(exp)),
stream.get_range().unwrap(),
))
} else if let Ok(block) = stream.parse() {
Ok(Expression::BlockExpr(Box::new(block)))
Ok(Expression(
Kind::BlockExpr(Box::new(block)),
stream.get_range().unwrap(),
))
} else if let Ok(ifexpr) = stream.parse() {
Ok(Expression::IfExpr(Box::new(ifexpr)))
Ok(Expression(
Kind::IfExpr(Box::new(ifexpr)),
stream.get_range().unwrap(),
))
} else if let Some(token) = stream.next() {
Ok(match &token {
Token::Identifier(v) => Expression::VariableName(v.clone()),
Token::DecimalValue(v) => Expression::Literal(Literal::I32(v.parse().unwrap())),
Token::Identifier(v) => {
Expression(Kind::VariableName(v.clone()), stream.get_range().unwrap())
}
Token::DecimalValue(v) => Expression(
Kind::Literal(Literal::I32(v.parse().unwrap())),
stream.get_range().unwrap(),
),
Token::ParenOpen => {
let exp = stream.parse()?;
stream.expect(Token::ParenClose)?;
@ -81,34 +105,38 @@ fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, Erro
fn parse_binop_rhs(
stream: &mut TokenStream,
mut lhs: Expression,
mut operator: Option<BinaryOperator>,
mut prev_operator: Option<BinaryOperator>,
) -> Result<Expression, Error> {
let expr_prec = if let Some(op) = operator {
op.get_precedence() + 1
// Expression precedence = LHS precedence so far.
let expr_precedence = if let Some(op) = prev_operator.take() {
op.get_precedence()
} else {
0
};
while let Some(op) = operator.take().as_ref().or(stream.parse().as_ref().ok()) {
while let Ok(op) =
// If next operator precedence is lower than expression precedence, we
// need to climb back up the recursion.
stream.parse_if::<BinaryOperator, _>(|b| b.get_precedence() >= expr_precedence)
{
let curr_token_prec = op.get_precedence();
let mut rhs = parse_primary_expression(stream)?;
if curr_token_prec < expr_prec {
break; // Just return lhs
} else {
let mut rhs = parse_primary_expression(stream)?;
if let Ok(next_op) = stream.parse::<BinaryOperator>() {
let next_prec = next_op.get_precedence();
if curr_token_prec < next_prec {
// Operator on the right of rhs has more precedence, turn
// rhs into lhs for new binop
rhs = parse_binop_rhs(stream, rhs, Some(next_op))?;
} else {
let _ = operator.insert(next_op);
}
if let Ok(next_op) = stream.parse_peek::<BinaryOperator>() {
let next_prec = next_op.get_precedence();
if curr_token_prec < next_prec {
// Operator on the right of rhs has more precedence, turn
// rhs into lhs for new binop
rhs = parse_binop_rhs(stream, rhs, Some(op))?;
} else {
let _ = prev_operator.insert(next_op);
}
lhs = Expression::Binop(*op, Box::new(lhs), Box::new(rhs));
}
lhs = Expression(
ExpressionKind::Binop(op, Box::new(lhs), Box::new(rhs)),
stream.get_range().unwrap(),
);
}
Ok(lhs)
@ -155,7 +183,7 @@ impl BinaryOperator {
}
#[derive(Debug, Clone)]
pub struct FunctionCallExpression(pub String, pub Vec<Expression>);
pub struct FunctionCallExpression(pub String, pub Vec<Expression>, pub TokenRange);
impl Parse for FunctionCallExpression {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
@ -174,7 +202,11 @@ impl Parse for FunctionCallExpression {
stream.expect(Token::ParenClose)?;
Ok(FunctionCallExpression(name, args))
Ok(FunctionCallExpression(
name,
args,
stream.get_range().unwrap(),
))
} else {
Err(stream.expected_err("identifier")?)
}
@ -182,17 +214,21 @@ impl Parse for FunctionCallExpression {
}
#[derive(Debug, Clone)]
pub struct IfExpression(Expression, pub Block);
pub struct IfExpression(pub Expression, pub Block, pub TokenRange);
impl Parse for IfExpression {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
stream.expect(Token::If)?;
Ok(IfExpression(stream.parse()?, stream.parse()?))
Ok(IfExpression(
stream.parse()?,
stream.parse()?,
stream.get_range().unwrap(),
))
}
}
#[derive(Debug, Clone)]
pub struct LetStatement(pub String, pub Expression);
pub struct LetStatement(pub String, pub Expression, pub TokenRange);
impl Parse for LetStatement {
fn parse(mut stream: TokenStream) -> Result<LetStatement, Error> {
@ -203,7 +239,11 @@ impl Parse for LetStatement {
let expression = stream.parse()?;
stream.expect(Token::Semi)?;
Ok(LetStatement(variable, expression))
Ok(LetStatement(
variable,
expression,
stream.get_range().unwrap(),
))
} else {
Err(stream.expected_err("identifier")?)
}
@ -211,7 +251,7 @@ impl Parse for LetStatement {
}
#[derive(Debug, Clone)]
pub struct ImportStatement(Vec<String>);
pub struct ImportStatement(Vec<String>, pub TokenRange);
impl Parse for ImportStatement {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
@ -234,17 +274,21 @@ impl Parse for ImportStatement {
stream.expect(Token::Semi)?;
Ok(ImportStatement(import_list))
Ok(ImportStatement(import_list, stream.get_range().unwrap()))
}
}
#[derive(Debug)]
pub struct FunctionDefinition(pub FunctionSignature, pub Block);
pub struct FunctionDefinition(pub FunctionSignature, pub Block, pub TokenRange);
impl Parse for FunctionDefinition {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
stream.expect(Token::FnKeyword)?;
Ok(FunctionDefinition(stream.parse()?, stream.parse()?))
Ok(FunctionDefinition(
stream.parse()?,
stream.parse()?,
stream.get_range().unwrap(),
))
}
}
@ -253,6 +297,7 @@ pub struct FunctionSignature {
pub name: String,
pub args: Vec<(String, Type)>,
pub return_type: Option<Type>,
pub range: TokenRange,
}
impl Parse for FunctionSignature {
@ -278,6 +323,7 @@ impl Parse for FunctionSignature {
name,
args,
return_type,
range: stream.get_range().unwrap(),
})
} else {
Err(stream.expected_err("identifier")?)?
@ -295,6 +341,7 @@ pub enum ReturnType {
pub struct Block(
pub Vec<BlockLevelStatement>,
pub Option<(ReturnType, Expression)>,
pub TokenRange,
);
impl Parse for Block {
@ -307,7 +354,7 @@ impl Parse for Block {
if let Some((r_type, e)) = return_stmt.take() {
// Special list of expressions that are simply not warned about,
// if semicolon is missing.
if !matches!(&e, &Expression::IfExpr(_)) {
if !matches!(e, Expression(ExpressionKind::IfExpr(_), _)) {
dbg!(r_type, &e);
println!("Oh no, does this statement lack ;");
}
@ -315,7 +362,7 @@ impl Parse for Block {
statements.push(BlockLevelStatement::Expression(e));
}
let statement = stream.parse()?;
if let BlockLevelStatement::Return((r_type, e)) = &statement {
if let BlockLevelStatement::Return(r_type, e) = &statement {
match r_type {
ReturnType::Hard => {
return_stmt = Some((*r_type, e.clone()));
@ -331,7 +378,7 @@ impl Parse for Block {
statements.push(statement);
}
stream.expect(Token::BraceClose)?;
Ok(Block(statements, return_stmt))
Ok(Block(statements, return_stmt, stream.get_range().unwrap()))
}
}
@ -340,7 +387,7 @@ pub enum BlockLevelStatement {
Let(LetStatement),
Import(ImportStatement),
Expression(Expression),
Return((ReturnType, Expression)),
Return(ReturnType, Expression),
}
impl Parse for BlockLevelStatement {
@ -353,14 +400,14 @@ impl Parse for BlockLevelStatement {
stream.next();
let exp = stream.parse()?;
stream.expect(Token::Semi)?;
Stmt::Return((ReturnType::Hard, exp))
Stmt::Return(ReturnType::Hard, exp)
}
_ => {
if let Ok(e) = stream.parse() {
if stream.expect(Token::Semi).is_ok() {
Stmt::Expression(e)
} else {
Stmt::Return((ReturnType::Soft, e))
Stmt::Return(ReturnType::Soft, e)
}
} else {
Err(stream.expected_err("expression")?)?

View File

@ -1,222 +0,0 @@
use std::collections::{hash_map, HashMap};
use crate::{
ast::{
BinaryOperator, Block, BlockLevelStatement, Expression, FunctionCallExpression,
FunctionDefinition, FunctionSignature, ReturnType, TopLevelStatement,
},
llvm_ir::{self, IRBlock, IRFunction, IRModule, IRValue, IRValueType},
};
#[derive(Clone)]
pub struct ScopeData {
named_vars: HashMap<String, IRValue>,
defined_functions: HashMap<String, (FunctionSignature, Option<IRFunction>)>,
}
impl ScopeData {
pub fn inner<'a, 'b>(&self, block: &'b mut IRBlock<'a>) -> Scope<'a, 'b> {
Scope {
block,
data: self.clone(),
}
}
pub fn var(&self, name: &String) -> Option<&IRValue> {
self.named_vars.get(name)
}
pub fn set_var(&mut self, name: &str, val: IRValue) -> Result<(), Error> {
if let hash_map::Entry::Vacant(e) = self.named_vars.entry(name.to_owned()) {
e.insert(val);
Ok(())
} else {
Err(Error::VariableAlreadyDefined(name.to_owned()))
}
}
pub fn function(
&mut self,
name: &String,
) -> Option<&mut (FunctionSignature, Option<IRFunction>)> {
self.defined_functions.get_mut(name)
}
pub fn set_function_signature(
&mut self,
name: &str,
sig: FunctionSignature,
ir: IRFunction,
) -> Result<(), Error> {
if let hash_map::Entry::Vacant(e) = self.defined_functions.entry(name.to_owned()) {
e.insert((sig, Some(ir)));
Ok(())
} else {
Err(Error::VariableAlreadyDefined(name.to_owned()))
}
}
}
pub struct Scope<'a, 'b> {
pub block: &'b mut IRBlock<'a>,
pub data: ScopeData,
}
impl<'a, 'b> Scope<'a, 'b> {
pub fn inner<'c>(&'c mut self) -> Scope<'a, 'c> {
Scope {
block: self.block,
data: self.data.clone(),
}
}
}
pub fn codegen_from_statements(statements: Vec<TopLevelStatement>) -> Result<IRModule, Error> {
let mut module = IRModule::new("testmod");
let mut scope = ScopeData {
defined_functions: HashMap::new(),
named_vars: HashMap::new(),
};
for statement in &statements {
match statement {
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, _)) => {
let function = module.create_func(&sig.name, IRValueType::I32);
scope.set_function_signature(&sig.name.clone(), sig.clone(), function)?;
}
TopLevelStatement::Import(_) => {}
}
}
for statement in &statements {
statement.codegen(&mut module, &mut scope)?;
}
Ok(module)
}
impl TopLevelStatement {
pub fn codegen(&self, module: &mut IRModule, root_data: &mut ScopeData) -> Result<(), Error> {
match self {
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, block)) => {
if let Some((_, ir)) = root_data.function(&sig.name) {
if let Some(ir_function) = ir.take() {
let mut ir_block = module.create_block();
let mut scope = root_data.inner(&mut ir_block);
let (_, value) = match block.codegen(&mut scope)? {
Some(v) => v,
None => panic!("Void-return type function not yet implemented!"),
};
ir_function.add_definition(value, ir_block);
} else {
Err(Error::FunctionAlreadyDefined(sig.name.clone()))?
}
} else {
panic!("Function was not declared before it's definition")
}
}
TopLevelStatement::Import(_) => {}
}
Ok(())
}
}
impl Block {
pub fn codegen(&self, scope: &mut Scope) -> Result<Option<(ReturnType, IRValue)>, Error> {
for statement in &self.0 {
statement.codegen(scope)?;
}
let value = if let Some((rt, exp)) = &self.1 {
Some((*rt, exp.codegen(scope)?))
} else {
None
};
Ok(value)
}
}
impl BlockLevelStatement {
pub fn codegen(&self, scope: &mut Scope) -> Result<(), Error> {
match self {
BlockLevelStatement::Let(let_statement) => {
let val = let_statement.1.codegen(scope)?;
scope.data.set_var(&let_statement.0, val)?;
Ok(())
}
BlockLevelStatement::Return(_) => panic!("Should never happen"),
BlockLevelStatement::Import(_) => Ok(()), // TODO: To implement
BlockLevelStatement::Expression(e) => {
let _value = e.codegen(scope)?;
Ok(())
}
}
}
}
impl Expression {
pub fn codegen(&self, scope: &mut Scope) -> Result<IRValue, Error> {
use Expression::*;
match self {
Binop(op, lhs, rhs) => match op {
BinaryOperator::Add => {
let lhs = lhs.codegen(scope)?;
let rhs = rhs.codegen(scope)?;
Ok(scope.block.add(lhs, rhs)?)
}
BinaryOperator::Mult => {
let lhs = lhs.codegen(scope)?;
let rhs = rhs.codegen(scope)?;
Ok(scope.block.mul(lhs, rhs)?)
}
_ => panic!("Other binary operators not supported yet!"),
},
BlockExpr(block) => {
let mut inner = scope.inner();
Ok(match block.codegen(&mut inner)? {
Some((r_type, value)) => match r_type {
ReturnType::Soft => value,
ReturnType::Hard => {
panic!("Hard returns in inner blocks not supported yet")
}
},
None => panic!("Void-return type block not yet implemented!"),
})
}
FunctionCall(fc) => {
let FunctionCallExpression(name, _) = &**fc;
if let Some((sig, _)) = scope.data.function(name) {
Ok(scope.block.function_call(sig)?)
} else {
Err(Error::UndefinedFunction(name.clone()))?
}
}
VariableName(name) => scope
.data
.var(name)
.cloned()
.ok_or(Error::UndefinedVariable(name.clone())),
Literal(lit) => Ok(scope.block.get_const(lit)),
IfExpr(_) => panic!("if expressions not yet supported"),
}
}
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Variable '{0}' already defined")]
VariableAlreadyDefined(String),
#[error("Variable '{0}' not yet defined")]
UndefinedVariable(String),
#[error("Function '{0}' not defined")]
UndefinedFunction(String),
#[error("Function '{0}' already defined")]
FunctionAlreadyDefined(String),
#[error(transparent)]
Deeper(#[from] llvm_ir::Error),
}

283
src/codegen/llvm.rs Normal file
View File

@ -0,0 +1,283 @@
use std::borrow::BorrowMut;
use std::ffi::{CStr, CString};
use std::mem;
use std::ptr::null_mut;
use llvm_sys::analysis::LLVMVerifyModule;
use llvm_sys::{
core::*, prelude::*, LLVMBasicBlock, LLVMBuilder, LLVMContext, LLVMModule, LLVMType, LLVMValue,
};
use crate::ast;
fn into_cstring<T: Into<String>>(value: T) -> CString {
let string = value.into();
unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) }
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Type mismatch: {0:?} vs {1:?}")]
TypeMismatch(IRType, IRType),
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum IRType {
I32,
Boolean,
}
impl IRType {
fn in_context(&self, context: &IRContext) -> *mut LLVMType {
use IRType::*;
unsafe {
return match self {
I32 => LLVMInt32TypeInContext(context.context),
Boolean => LLVMInt1TypeInContext(context.context),
};
}
}
}
#[derive(Clone)]
pub struct IRValue(pub IRType, *mut LLVMValue);
impl IRValue {
pub fn from_literal(literal: &ast::Literal, module: &IRModule) -> Self {
use ast::Literal;
match literal {
Literal::I32(v) => {
let ir_type = IRType::I32;
unsafe {
let ir_value = LLVMConstInt(
ir_type.in_context(module.context),
mem::transmute(*v as i64),
1,
);
return IRValue(ir_type, ir_value);
}
}
};
}
}
pub struct IRContext {
context: *mut LLVMContext,
builder: *mut LLVMBuilder,
}
impl IRContext {
pub fn new() -> IRContext {
unsafe {
// Set up a context, module and builder in that context.
let context = LLVMContextCreate();
let builder = LLVMCreateBuilderInContext(context);
IRContext { context, builder }
}
}
pub fn module<'a>(&'a mut self, name: String) -> IRModule<'a> {
IRModule::new(self, name)
}
}
impl Drop for IRContext {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeBuilder(self.builder);
LLVMContextDispose(self.context);
}
}
}
pub struct IRModule<'a> {
context: &'a mut IRContext,
module: *mut LLVMModule,
}
impl<'a> IRModule<'a> {
fn new<'b: 'a>(context: &'b mut IRContext, name: String) -> IRModule<'a> {
unsafe {
let module =
LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), context.context);
IRModule { context, module }
}
}
pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> {
unsafe {
LLVMVerifyModule(
self.module,
llvm_sys::analysis::LLVMVerifierFailureAction::LLVMPrintMessageAction,
null_mut(),
);
CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str()
}
}
}
impl<'a> Drop for IRModule<'a> {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeModule(self.module);
}
}
}
pub struct IRFunction<'a, 'b> {
pub module: &'b IRModule<'a>,
/// The actual function
value: *mut LLVMValue,
}
impl<'a, 'b> IRFunction<'a, 'b> {
pub fn new(name: &String, module: &'b IRModule<'a>) -> IRFunction<'a, 'b> {
unsafe {
// TODO, fix later!
let return_type = LLVMInt32TypeInContext(module.context.context);
let mut argts = [];
let func_type =
LLVMFunctionType(return_type, argts.as_mut_ptr(), argts.len() as u32, 0);
let function = LLVMAddFunction(module.module, into_cstring(name).as_ptr(), func_type);
IRFunction {
module,
value: function,
}
}
}
}
pub struct IRBlock<'a, 'b, 'c> {
pub function: &'c IRFunction<'a, 'b>,
blockref: *mut LLVMBasicBlock,
}
impl<'a, 'b, 'c> IRBlock<'a, 'b, 'c> {
pub fn new(function: &'c IRFunction<'a, 'b>, name: &CStr) -> IRBlock<'a, 'b, 'c> {
unsafe {
let blockref =
LLVMCreateBasicBlockInContext(function.module.context.context, name.as_ptr());
IRBlock { function, blockref }
}
}
pub fn add(
&mut self,
IRValue(lhs_t, lhs_v): IRValue,
IRValue(rhs_t, rhs_v): IRValue,
) -> Result<IRValue, Error> {
unsafe {
LLVMPositionBuilderAtEnd(self.function.module.context.builder, self.blockref);
if lhs_t == rhs_t {
Ok(IRValue(
lhs_t,
LLVMBuildAdd(
self.function.module.context.builder,
lhs_v,
rhs_v,
c"tmpadd".as_ptr(),
),
))
} else {
Err(Error::TypeMismatch(lhs_t, rhs_t))
}
}
}
pub fn mult(
&mut self,
IRValue(lhs_t, lhs_v): IRValue,
IRValue(rhs_t, rhs_v): IRValue,
) -> Result<IRValue, Error> {
unsafe {
LLVMPositionBuilderAtEnd(self.function.module.context.builder, self.blockref);
if lhs_t == rhs_t {
Ok(IRValue(
lhs_t,
LLVMBuildMul(
self.function.module.context.builder,
lhs_v,
rhs_v,
c"tmpadd".as_ptr(),
),
))
} else {
Err(Error::TypeMismatch(lhs_t, rhs_t))
}
}
}
pub fn less_than(
&mut self,
IRValue(lhs_t, lhs_v): IRValue,
IRValue(rhs_t, rhs_v): IRValue,
) -> Result<IRValue, Error> {
unsafe {
LLVMPositionBuilderAtEnd(self.function.module.context.builder, self.blockref);
if lhs_t == rhs_t {
Ok(IRValue(
IRType::Boolean,
LLVMBuildICmp(
self.function.module.context.builder,
llvm_sys::LLVMIntPredicate::LLVMIntULT,
lhs_v,
rhs_v,
c"IntULT".as_ptr(),
),
))
} else {
Err(Error::TypeMismatch(lhs_t, rhs_t))
}
}
}
pub fn add_return(&mut self, value: Option<IRValue>) {
unsafe {
LLVMPositionBuilderAtEnd(self.function.module.context.builder, self.blockref);
if let Some(IRValue(_, value)) = value {
LLVMBuildRet(self.function.module.context.builder, value);
} else {
LLVMBuildRetVoid(self.function.module.context.builder);
}
}
}
pub fn branch(
&mut self,
IRValue(_, condition): IRValue,
then_block: &mut IRBlock,
else_block: &mut IRBlock,
) {
unsafe {
LLVMPositionBuilderAtEnd(self.function.module.context.builder, self.blockref);
LLVMBuildCondBr(
self.function.module.context.builder,
condition,
then_block.blockref,
else_block.blockref,
);
}
}
pub fn move_into(&mut self, block: &mut IRBlock) {
unsafe {
LLVMPositionBuilderAtEnd(self.function.module.context.builder, self.blockref);
LLVMBuildBr(self.function.module.context.builder, block.blockref);
}
}
}
impl<'a, 'b, 'c> Drop for IRBlock<'a, 'b, 'c> {
fn drop(&mut self) {
unsafe {
LLVMAppendExistingBasicBlock(self.function.value, self.blockref);
}
}
}

176
src/codegen/mod.rs Normal file
View File

@ -0,0 +1,176 @@
mod llvm;
use std::collections::HashMap;
use llvm::{Error, IRBlock, IRContext, IRFunction, IRModule, IRValue};
use crate::{
ast::{
Block, BlockLevelStatement, Expression, ExpressionKind, FunctionDefinition, IfExpression,
LetStatement, ReturnType,
},
TopLevelStatement,
};
pub fn form_context() -> IRContext {
IRContext::new()
}
pub fn from_statements(
context: &mut IRContext,
statements: Vec<TopLevelStatement>,
) -> Result<IRModule, Error> {
let mut module = context.module("testmod".to_owned());
let mut scope = ScopeData::new();
for statement in statements {
statement.codegen(&mut scope, &mut module);
}
Ok(module)
}
impl TopLevelStatement {
fn codegen(&self, scope: &mut ScopeData, module: &mut IRModule) {
match self {
Self::FunctionDefinition(func) => func.codegen(scope, module),
Self::Import(_) => panic!("not implemented"),
}
}
}
impl FunctionDefinition {
fn codegen(&self, scope: &mut ScopeData, module: &mut IRModule) {
let FunctionDefinition(signature, block, _) = self;
let ir_function = IRFunction::new(&signature.name, module);
let ir_block = IRBlock::new(&ir_function, c"entry");
let mut scope = scope.inner(ir_block);
if let Some((_, val)) = block.codegen(&mut scope) {
scope.block.add_return(Some(val));
} else {
scope.block.add_return(None);
}
}
}
impl Block {
#[must_use]
fn codegen(&self, scope: &mut Scope) -> Option<(ReturnType, IRValue)> {
for statement in &self.0 {
statement.codegen(scope);
}
if let Some((ret_type, return_exp)) = &self.1 {
let value = return_exp.codegen(scope);
Some((*ret_type, value))
} else {
None
}
}
}
impl BlockLevelStatement {
fn codegen(&self, scope: &mut Scope) {
use BlockLevelStatement::*;
match self {
Expression(exp) | Return(ReturnType::Soft, exp) => {
exp.codegen(scope);
}
Let(LetStatement(name, exp, _)) => {
let val = exp.codegen(scope);
scope.data.insert(name, val);
}
Return(ReturnType::Hard, _) => panic!("hard returns here should not be possible.."),
Import(_) => panic!("block level import not supported"),
}
}
}
impl Expression {
fn codegen(&self, scope: &mut Scope) -> IRValue {
let Expression(kind, _) = self;
use ExpressionKind::*;
match kind {
Literal(lit) => IRValue::from_literal(lit, &scope.block.function.module),
VariableName(v) => scope.data.fetch(v),
Binop(op, lhs, rhs) => {
let lhs = lhs.codegen(scope);
let rhs = rhs.codegen(scope);
use crate::ast::BinaryOperator::*;
match op {
Add => scope.block.add(lhs, rhs).unwrap(),
Mult => scope.block.mult(lhs, rhs).unwrap(),
LessThan => scope.block.less_than(lhs, rhs).unwrap(),
_ => panic!("operator not supported: {:?}", op),
}
}
IfExpr(ifx) => {
let IfExpression(expr, block, _) = ifx.as_ref();
let condition = expr.codegen(scope);
let mut thenb = IRBlock::new(scope.block.function, c"then");
let mut afterb = IRBlock::new(scope.block.function, c"merge");
scope.block.branch(condition, &mut thenb, &mut afterb);
scope.block = afterb;
let mut then = scope.inner(thenb);
match block.codegen(&mut then) {
Some((ReturnType::Hard, v)) => then.block.add_return(Some(v)),
_ => then.block.move_into(&mut scope.block),
}
IRValue::from_literal(&crate::ast::Literal::I32(1), scope.block.function.module)
}
BlockExpr(_) => panic!("block expr not supported"),
FunctionCall(_) => panic!("function call expr not supported"),
}
}
}
#[derive(Clone)]
struct ScopeData {
vars: HashMap<String, IRValue>,
}
impl ScopeData {
fn new() -> ScopeData {
ScopeData {
vars: HashMap::new(),
}
}
fn with_block<'a, 'b, 'c>(self, block: IRBlock<'a, 'b, 'c>) -> Scope<'a, 'b, 'c> {
Scope { data: self, block }
}
fn inner<'a, 'b, 'c>(&self, block: IRBlock<'a, 'b, 'c>) -> Scope<'a, 'b, 'c> {
self.clone().with_block(block)
}
fn fetch(&self, name: &String) -> IRValue {
match self.vars.get(name) {
Some(val) => val.clone(),
_ => panic!("No such variable in scope: {}", name),
}
}
fn insert(&mut self, name: &String, value: IRValue) {
match self.vars.insert(name.clone(), value) {
Some(_) => panic!("{} was already defined in scope", name),
_ => {}
}
}
}
struct Scope<'a, 'b, 'c> {
data: ScopeData,
block: IRBlock<'a, 'b, 'c>,
}
impl<'a, 'b, 'c> Scope<'a, 'b, 'c> {
fn inner(&self, block: IRBlock<'a, 'b, 'c>) -> Scope<'a, 'b, 'c> {
self.data.clone().with_block(block)
}
}

View File

@ -1,12 +1,11 @@
use crate::{
ast::TopLevelStatement, codegen::codegen_from_statements, lexer::Token,
token_stream::TokenStream,
};
use codegen::{form_context, from_statements};
use crate::{ast::TopLevelStatement, lexer::Token, token_stream::TokenStream};
mod ast;
mod codegen;
mod lexer;
mod llvm_ir;
// mod llvm_ir;
mod token_stream;
// TODO:
@ -21,8 +20,8 @@ pub enum ReidError {
LexerError(#[from] lexer::Error),
#[error(transparent)]
ParserError(#[from] token_stream::Error),
#[error(transparent)]
CodegenError(#[from] codegen::Error),
// #[error(transparent)]
// CodegenError(#[from] codegen::Error),
}
pub fn compile(source: &str) -> Result<String, ReidError> {
@ -40,7 +39,8 @@ pub fn compile(source: &str) -> Result<String, ReidError> {
statements.push(statement);
}
let mut module = codegen_from_statements(statements)?;
let mut context = form_context();
let mut module = from_statements(&mut context, statements).unwrap();
let text = module.print_to_string().unwrap();
Ok(text.to_owned())
}

View File

@ -1,203 +0,0 @@
use std::ffi::{CStr, CString};
use std::mem;
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
use crate::ast::{FunctionSignature, Literal};
macro_rules! cstr {
($string:expr) => {
core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr()
};
}
#[derive(Clone, Debug)]
#[must_use = "value contains raw pointer and must be inserted somewhere"]
pub struct IRValue(IRValueType, LLVMValueRef);
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum IRValueType {
I32,
}
impl IRValueType {
unsafe fn get_llvm_type(&self, module: &mut IRModule) -> LLVMTypeRef {
match *self {
Self::I32 => LLVMInt32TypeInContext(module.context),
}
}
}
fn into_cstring<T: Into<String>>(value: T) -> CString {
let string = value.into();
unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) }
}
pub struct IRModule {
context: *mut LLVMContext,
module: *mut LLVMModule,
builder: *mut LLVMBuilder,
}
impl IRModule {
pub fn new<T: Into<String>>(name: T) -> IRModule {
unsafe {
// Set up a context, module and builder in that context.
let context = LLVMContextCreate();
let module = LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), context);
let builder = LLVMCreateBuilderInContext(context);
IRModule {
context,
module,
builder,
}
}
}
pub fn create_block(&mut self) -> IRBlock {
IRBlock::create("entry", self)
}
pub fn create_func<T: Into<String>>(
&mut self,
name: T,
return_type: IRValueType,
) -> IRFunction {
unsafe {
let mut argts = [];
let func_type = LLVMFunctionType(
return_type.get_llvm_type(self),
argts.as_mut_ptr(),
argts.len() as u32,
0,
);
let anon_func = LLVMAddFunction(self.module, into_cstring(name).as_ptr(), func_type);
IRFunction {
value: IRValue(return_type, anon_func),
}
}
}
pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> {
unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() }
}
}
impl Drop for IRModule {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeBuilder(self.builder);
LLVMDisposeModule(self.module);
LLVMContextDispose(self.context);
}
}
}
#[derive(Clone, Debug)]
pub struct IRFunction {
value: IRValue,
}
impl IRFunction {
pub fn add_definition(self, ret: IRValue, block: IRBlock) {
unsafe {
LLVMAppendExistingBasicBlock(self.value.1, block.blockref);
LLVMBuildRet(block.module.builder, ret.1);
}
}
}
pub struct IRBlock<'a> {
module: &'a mut IRModule,
blockref: LLVMBasicBlockRef,
}
impl<'a> IRBlock<'a> {
fn create<T: Into<String>>(name: T, codegen: &'a mut IRModule) -> IRBlock<'a> {
unsafe {
let blockref =
LLVMCreateBasicBlockInContext(codegen.context, into_cstring(name).as_ptr());
LLVMPositionBuilderAtEnd(codegen.builder, blockref);
IRBlock {
module: codegen,
blockref,
}
}
}
pub fn get_const(&mut self, literal_type: &Literal) -> IRValue {
unsafe {
match *literal_type {
Literal::I32(v) => IRValue(
IRValueType::I32,
LLVMConstInt(
LLVMInt32TypeInContext(self.module.context),
mem::transmute(v as i64),
1,
),
),
}
}
}
pub fn add(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
unsafe {
if lhs.0 == rhs.0 {
Ok(IRValue(
lhs.0,
LLVMBuildAdd(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(Error::TypeMismatch(lhs.0, rhs.0))
}
}
}
pub fn mul(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
unsafe {
if lhs.0 == rhs.0 {
Ok(IRValue(
lhs.0,
LLVMBuildMul(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(Error::TypeMismatch(lhs.0, rhs.0))
}
}
}
pub fn function_call(&mut self, callee: &FunctionSignature) -> Result<IRValue, Error> {
unsafe {
let function = LLVMGetNamedFunction(
self.module.module,
into_cstring(callee.name.clone()).as_ptr(),
);
let ret_t = LLVMInt32TypeInContext(self.module.context);
let mut argts = [];
let mut args = [];
let fun_t = LLVMFunctionType(ret_t, argts.as_mut_ptr(), argts.len() as u32, 0);
let call = LLVMBuildCall2(
self.module.builder,
fun_t,
function,
args.as_mut_ptr(),
args.len() as u32,
into_cstring(&callee.name).as_ptr(),
);
Ok(IRValue(IRValueType::I32, call))
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Type Mismatch: {0:?} {1:?}")]
TypeMismatch(IRValueType, IRValueType),
}

View File

@ -57,7 +57,61 @@ impl<'a, 'b> TokenStream<'a, 'b> {
}
}
pub fn parse<T: Parse>(&mut self) -> Result<T, Error> {
/// Parse the next value of trait Parse. If the parse succeeded, the related
/// tokens are consumed, otherwise token stream does not advance.
///
/// Parsetime-error is returned on failure.
pub fn parse<T: Parse + std::fmt::Debug>(&mut self) -> Result<T, Error> {
let (res, pos) = self.parse_meta()?;
self.position = pos;
Ok(res)
}
/// Parse the next item with Parse-trait (Same as [`TokenStream::parse`])
/// without consuming the related tokens, essentially only peeking.
pub fn parse_peek<T: Parse + std::fmt::Debug>(&mut self) -> Result<T, Error> {
self.parse_meta().map(|(res, _)| res)
}
/// Parse the next item with Parse-trait, also mapping it with the given
/// function. The token-stream is only consumed, if the inner function
/// retuns an Ok.
pub fn parse_map<T: Parse + std::fmt::Debug, F, O>(&mut self, inner: F) -> Result<O, Error>
where
F: Fn(T) -> Result<O, Error>,
{
let (res, pos) = self.parse_meta::<T>()?;
match inner(res) {
Ok(mapped) => {
self.position = pos;
Ok(mapped)
}
Err(e) => Err(e),
}
}
/// Parses the item with Parse if the condition specified by the
/// lambda-function is passed. Errors returned from this should not be
/// passed to the end-user.
pub fn parse_if<T: Parse + std::fmt::Debug, F>(&mut self, inner: F) -> Result<T, Error>
where
F: Fn(&T) -> bool,
{
let (res, pos) = self.parse_meta::<T>()?;
if inner(&res) {
self.position = pos;
Ok(res)
} else {
Err(Error::IfFailed)
}
}
/// Parse the next item with Parse-trait. If successful, returning the
/// parsed item and the new position of the TokenStream. Failing, returning
/// parse-error.
///
/// Used for [`TokenStream::parse`] and [`TokenStream::parse_peek`]
fn parse_meta<T: Parse + std::fmt::Debug>(&mut self) -> Result<(T, usize), Error> {
let mut ref_pos = self.position;
let position = self.position;
@ -69,8 +123,9 @@ impl<'a, 'b> TokenStream<'a, 'b> {
match T::parse(clone) {
Ok(res) => {
self.position = ref_pos.max(self.position);
Ok(res)
dbg!(&res);
let new_pos = ref_pos.max(self.position);
Ok((res, new_pos))
}
Err(e) => Err(e),
}
@ -84,6 +139,13 @@ impl<'a, 'b> TokenStream<'a, 'b> {
Ok(self.tokens[token_idx].position)
}
}
pub fn get_range(&self) -> Option<TokenRange> {
self.ref_position.as_ref().map(|ref_pos| TokenRange {
start: **ref_pos,
end: self.position,
})
}
}
impl Drop for TokenStream<'_, '_> {
@ -94,10 +156,28 @@ impl Drop for TokenStream<'_, '_> {
}
}
#[derive(Clone)]
pub struct TokenRange {
pub start: usize,
pub end: usize,
}
impl std::fmt::Debug for TokenRange {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Tokens[{} - {}]", self.start, self.end)
}
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Expected {} at Ln {}, Col {}, got {:?}", .0, (.2).1, (.2).0, .1)]
Expected(String, Token, Position),
#[error("Source file contains no tokens")]
FileEmpty,
/// Only use this error in situations where the error never ends up for the end-user!
#[error("Undefined error, should only be used in situations where the error is not emitted!")]
Undefined,
/// Condition failed for the parse-if
#[error("Condition failed for parse-if. Should never be returned to end-user.")]
IfFailed,
}