Add preliminary codegen, update parsing to require functions

This commit is contained in:
Sofia 2023-08-02 15:31:33 +03:00
parent 17ab6a0cea
commit dca604a038
6 changed files with 356 additions and 114 deletions

3
.gitignore vendored
View File

@ -1,2 +1,3 @@
/target /target
/.vscode /.vscode
.env

View File

@ -1,4 +1,8 @@
// Hello, comment here! // Hello, comment here!
let hello = 32;
let beep = fn main() {
hello ; let hello = 32;
let beep =
hello ;
return beep;
}

112
src/codegen.rs Normal file
View File

@ -0,0 +1,112 @@
use std::mem;
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
use crate::parser::Literal;
macro_rules! cstr {
($string:expr) => {
core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr()
};
}
#[derive(PartialEq, Eq)]
pub enum ValueType {
I32,
}
impl ValueType {
unsafe fn get_llvm_type(&self, codegen: &mut CodeGenerator) -> LLVMTypeRef {
match *self {
Self::I32 => LLVMInt32TypeInContext(codegen.context),
}
}
}
#[must_use = "value contains raw pointer and must be inserted somewhere"]
pub struct Value(ValueType, LLVMValueRef);
pub struct CodeGenerator {
context: *mut LLVMContext,
module: *mut LLVMModule,
builder: *mut LLVMBuilder,
}
impl CodeGenerator {
pub fn new() -> CodeGenerator {
unsafe {
// Set up a context, module and builder in that context.
let context = LLVMContextCreate();
let module = LLVMModuleCreateWithNameInContext(cstr!("testmodule"), context);
let builder = LLVMCreateBuilderInContext(context);
CodeGenerator {
context,
module,
builder,
}
}
}
pub fn get_const(&mut self, literal_type: &Literal) -> Value {
unsafe {
match *literal_type {
Literal::I32(v) => Value(
ValueType::I32,
LLVMConstInt(
LLVMInt32TypeInContext(self.context),
mem::transmute(v as i64),
1,
),
),
}
}
}
pub fn add(&mut self, lhs: Value, rhs: Value) -> Result<Value, ()> {
unsafe {
if lhs.0 == rhs.0 {
Ok(Value(
lhs.0,
LLVMBuildAdd(self.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(())
}
}
}
pub fn create_func(&mut self, ret: Value) {
unsafe {
let mut argts = [];
let func_type = LLVMFunctionType(
ret.0.get_llvm_type(self),
argts.as_mut_ptr(),
argts.len() as u32,
0,
);
let anon_func = LLVMAddFunction(self.module, cstr!("_anon_func"), func_type);
// Create a basic block in the function and set our builder to generate
// code in it.
let bb = LLVMAppendBasicBlockInContext(self.context, anon_func, cstr!("entry"));
LLVMPositionBuilderAtEnd(self.builder, bb);
// Emit a `ret i64` into the function to return the computed sum.
LLVMBuildRet(self.builder, ret.1);
}
}
}
impl Drop for CodeGenerator {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeBuilder(self.builder);
LLVMDumpModule(self.module);
LLVMDisposeModule(self.module);
LLVMContextDispose(self.context);
}
}
}

View File

@ -2,31 +2,123 @@ use std::{fmt::Debug, iter::Peekable, str::Chars};
static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum Token {
// Values
Identifier(String),
/// Number with at most one decimal point
DecimalValue(String),
// Keywords
/// `let`
LetKeyword,
/// `import`
ImportKeyword,
/// `return`
ReturnKeyword,
/// `fn`
FnKeyword,
// Symbols
/// `;`
Semi,
/// `=`
Equals,
/// `:`
Colon,
/// `+`
Plus,
/// `*`
Times,
/// `(`
ParenOpen,
/// `)`
ParenClose,
/// `{`
BraceOpen,
/// `}`
BraceClose,
/// `,`
Comma,
Eof,
}
impl Token {
pub fn get_token_prec(&self) -> i8 {
match &self {
Token::Plus => 10,
Token::Times => 20,
_ => -1,
}
}
}
#[derive(Clone)]
pub struct FullToken {
pub token: Token,
pub position: Position,
}
impl Debug for FullToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"{:?} (Ln {}, Col {})",
self.token, self.position.1, self.position.0
))
}
}
pub type Position = (u32, u32);
const EOF_CHAR: char = '\0';
pub struct Cursor<'a> {
pub position: Position,
char_stream: Chars<'a>,
}
impl<'a> Cursor<'a> {
fn next(&mut self) -> Option<char> {
let next = self.char_stream.next();
self.position.0 += 1;
if let Some('\n') = next {
self.position.1 += 1;
self.position.0 = 0;
}
next
}
fn first(&mut self) -> Option<char> {
// `.next()` optimizes better than `.nth(0)`
self.char_stream.clone().next()
}
fn second(&mut self) -> Option<char> {
// `.next()` optimizes better than `.nth(1)`
let mut stream = self.char_stream.clone();
stream.next();
stream.next()
}
}
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, String> { pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, String> {
let to_tokenize = to_tokenize.into(); let to_tokenize = to_tokenize.into();
let mut position = (0, 1); let mut position = (0, 1);
let mut cursor = Cursor { let mut cursor = Cursor {
char_stream: to_tokenize.chars().peekable(), char_stream: to_tokenize.chars(),
position, position,
}; };
let mut tokens = Vec::new(); let mut tokens = Vec::new();
while let Some(character) = &cursor.next() { while let Some(character) = &cursor.next() {
position.0 += 1;
if *character == '\n' {
position.1 += 1;
position.0 = 0;
}
let peek = cursor.peek();
let variant = match character { let variant = match character {
// Whitespace // Whitespace
w if w.is_whitespace() => continue, w if w.is_whitespace() => continue,
// Comments // Comments
'/' if peek == Some(&'/') => { '/' if cursor.first() == Some('/') => {
while !matches!(&cursor.peek(), Some('\n')) { while !matches!(cursor.first(), Some('\n')) {
cursor.next(); cursor.next();
} }
continue; continue;
@ -34,7 +126,7 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
// "words" // "words"
c if c.is_alphabetic() => { c if c.is_alphabetic() => {
let mut value = character.to_string(); let mut value = character.to_string();
while let Some(c) = &cursor.peek() { while let Some(c) = cursor.first() {
if !c.is_ascii_alphanumeric() { if !c.is_ascii_alphanumeric() {
break; break;
} }
@ -46,6 +138,8 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
let variant = match value.as_str() { let variant = match value.as_str() {
"let" => Token::LetKeyword, "let" => Token::LetKeyword,
"import" => Token::ImportKeyword, "import" => Token::ImportKeyword,
"return" => Token::ReturnKeyword,
"fn" => Token::FnKeyword,
_ => Token::Identifier(value), _ => Token::Identifier(value),
}; };
variant variant
@ -53,8 +147,8 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
// Decimals // Decimals
c if DECIMAL_NUMERICS.contains(c) => { c if DECIMAL_NUMERICS.contains(c) => {
let mut value = character.to_string(); let mut value = character.to_string();
while let Some(c) = &cursor.peek() { while let Some(c) = cursor.first() {
if !DECIMAL_NUMERICS.contains(c) { if !DECIMAL_NUMERICS.contains(&c) {
break; break;
} }
value += &c.to_string(); value += &c.to_string();
@ -64,12 +158,14 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
} }
// Single character tokens // Single character tokens
'=' => Token::Equals, '=' => Token::Equals,
';' => Token::Semicolon, ';' => Token::Semi,
':' => Token::Colon, ':' => Token::Colon,
'+' => Token::Plus, '+' => Token::Plus,
'*' => Token::Times, '*' => Token::Times,
'(' => Token::ParenOpen, '(' => Token::ParenOpen,
')' => Token::ParenClose, ')' => Token::ParenClose,
'{' => Token::BraceOpen,
'}' => Token::BraceClose,
',' => Token::Comma, ',' => Token::Comma,
// Invalid token // Invalid token
_ => Err(format!( _ => Err(format!(
@ -93,74 +189,3 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
Ok(tokens) Ok(tokens)
} }
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum Token {
// Values
Identifier(String),
/// Number with at most one decimal point
DecimalValue(String),
// Keywords
LetKeyword,
ImportKeyword,
// Symbols
Semicolon,
Equals,
Colon,
Plus,
Times,
ParenOpen, // (
ParenClose, // )
Comma,
Eof,
}
impl Token {
pub fn get_token_prec(&self) -> i8 {
match &self {
Token::Plus => 10,
Token::Times => 20,
_ => -1,
}
}
}
pub struct FullToken {
pub token: Token,
position: Position,
}
impl Debug for FullToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"{:?} (Ln {}, Col {})",
self.token, self.position.1, self.position.0
))
}
}
pub type Position = (u32, u32);
pub struct Cursor<'a> {
pub position: Position,
char_stream: Peekable<Chars<'a>>,
}
impl<'a> Cursor<'a> {
fn next(&mut self) -> Option<char> {
let next = self.char_stream.next();
self.position.0 += 1;
if let Some('\n') = next {
self.position.1 += 1;
self.position.0 = 0;
}
next
}
fn peek(&mut self) -> Option<&char> {
self.char_stream.peek()
}
}

View File

@ -1,24 +1,40 @@
use crate::{lexer::Token, parser::TopLevelStatement, token_stream::TokenStream}; use crate::{
codegen::CodeGenerator, lexer::Token, parser::TopLevelStatement, token_stream::TokenStream,
};
pub static EASIEST: &str = include_str!("../reid/easiest.reid"); pub static EASIEST: &str = include_str!("../reid/easiest.reid");
pub static EASY: &str = include_str!("../reid/easy.reid"); pub static EASY: &str = include_str!("../reid/easy.reid");
pub static MEDIUM: &str = include_str!("../reid/medium.reid"); pub static MEDIUM: &str = include_str!("../reid/medium.reid");
pub static HARD: &str = include_str!("../reid/hard.reid"); pub static HARD: &str = include_str!("../reid/hard.reid");
mod codegen;
mod lexer; mod lexer;
mod parser; mod parser;
mod token_stream; mod token_stream;
// TODO:
// 1. Make it so that TopLevelStatement can only be import or function def
// 2. Make BlockLevelStatement, that has everything TopLevelStatement has now
// 3. Make it so all codegen is done with a Block-struct, that represents a
// single proper block
fn main() { fn main() {
let tokens = lexer::tokenize(EASY).unwrap(); let tokens = lexer::tokenize(EASIEST).unwrap();
dbg!(&tokens); dbg!(&tokens);
let mut token_stream = TokenStream::from(&tokens); let mut token_stream = TokenStream::from(&tokens);
while let Ok(statement) = token_stream.parse::<TopLevelStatement>() { while !matches!(token_stream.peek().unwrap_or(Token::Eof), Token::Eof) {
let statement = token_stream.parse::<TopLevelStatement>().unwrap();
dbg!(&statement); dbg!(&statement);
} }
dbg!(token_stream.expect(Token::Eof).ok()); let mut c = CodeGenerator::new();
let x = c.get_const(&parser::Literal::I32(3));
let y = c.get_const(&parser::Literal::I32(4));
let add = c.add(x, y).unwrap();
c.create_func(add);
// dbg!(token_stream.expect(Token::Eof).ok());
} }

View File

@ -7,12 +7,22 @@ where
fn parse(stream: TokenStream) -> Result<Self, ()>; fn parse(stream: TokenStream) -> Result<Self, ()>;
} }
#[derive(Debug, Clone)]
pub enum Literal {
I32(i32),
}
#[derive(Debug, Clone)]
pub enum BinaryOperator {
Add,
Mult,
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum Expression { pub enum Expression {
VariableName(String), VariableName(String),
ContantI32(i32), Literal(Literal),
BinopAdd(Box<Expression>, Box<Expression>), Binop(BinaryOperator, Box<Expression>, Box<Expression>),
BinopMult(Box<Expression>, Box<Expression>),
FunctionCall(Box<FunctionCallExpression>), FunctionCall(Box<FunctionCallExpression>),
} }
@ -29,7 +39,7 @@ fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, ()>
} else if let Some(token) = stream.next() { } else if let Some(token) = stream.next() {
Ok(match &token { Ok(match &token {
Token::Identifier(v) => Expression::VariableName(v.clone()), Token::Identifier(v) => Expression::VariableName(v.clone()),
Token::DecimalValue(v) => Expression::ContantI32(v.parse().unwrap()), Token::DecimalValue(v) => Expression::Literal(Literal::I32(v.parse().unwrap())),
_ => Err(())?, // TODO: Add error raporting! _ => Err(())?, // TODO: Add error raporting!
}) })
} else { } else {
@ -66,9 +76,11 @@ fn parse_binop_rhs(
} }
} }
use BinaryOperator::*;
lhs = match &token { lhs = match &token {
Token::Plus => Expression::BinopAdd(Box::new(lhs), Box::new(rhs)), Token::Plus => Expression::Binop(Add, Box::new(lhs), Box::new(rhs)),
Token::Times => Expression::BinopMult(Box::new(lhs), Box::new(rhs)), Token::Times => Expression::Binop(Mult, Box::new(lhs), Box::new(rhs)),
_ => Err(())?, // TODO: Add error raporting! _ => Err(())?, // TODO: Add error raporting!
}; };
} }
@ -106,24 +118,17 @@ impl Parse for FunctionCallExpression {
#[derive(Debug)] #[derive(Debug)]
pub enum TopLevelStatement { pub enum TopLevelStatement {
Let(LetStatement),
Import(ImportStatement), Import(ImportStatement),
TLExpression(Expression), FunctionDefinition(FunctionDefinition),
} }
impl Parse for TopLevelStatement { impl Parse for TopLevelStatement {
fn parse(mut stream: TokenStream) -> Result<Self, ()> { fn parse(mut stream: TokenStream) -> Result<Self, ()> {
use TopLevelStatement as Stmt;
Ok(match stream.peek() { Ok(match stream.peek() {
Some(Token::LetKeyword) => TopLevelStatement::Let(stream.parse()?), Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
Some(Token::ImportKeyword) => TopLevelStatement::Import(stream.parse()?), Some(Token::FnKeyword) => Stmt::FunctionDefinition(stream.parse()?),
_ => { _ => Err(())?, // TODO: Add error raporting!
if let Ok(e) = stream.parse() {
stream.expect(Token::Semicolon)?;
TopLevelStatement::TLExpression(e)
} else {
Err(())? // TODO: Add error raporting!
}
}
}) })
} }
} }
@ -139,7 +144,7 @@ impl Parse for LetStatement {
stream.expect(Token::Equals)?; stream.expect(Token::Equals)?;
let expression = stream.parse()?; let expression = stream.parse()?;
stream.expect(Token::Semicolon)?; stream.expect(Token::Semi)?;
Ok(LetStatement(variable, expression)) Ok(LetStatement(variable, expression))
} else { } else {
Err(()) // TODO: Add error raporting! Err(()) // TODO: Add error raporting!
@ -169,8 +174,87 @@ impl Parse for ImportStatement {
Err(())? // TODO: Add error raporting! Err(())? // TODO: Add error raporting!
} }
stream.expect(Token::Semicolon)?; stream.expect(Token::Semi)?;
Ok(ImportStatement(import_list)) Ok(ImportStatement(import_list))
} }
} }
#[derive(Debug)]
pub struct FunctionDefinition(FunctionSignature, Block);
impl Parse for FunctionDefinition {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
stream.expect(Token::FnKeyword)?;
Ok(FunctionDefinition(stream.parse()?, stream.parse()?))
}
}
#[derive(Debug)]
pub struct FunctionSignature {
name: String,
}
impl Parse for FunctionSignature {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
if let Some(Token::Identifier(name)) = stream.next() {
stream.expect(Token::ParenOpen)?;
stream.expect(Token::ParenClose)?;
Ok(FunctionSignature { name })
} else {
Err(()) // TODO: Add error raporting!
}
}
}
#[derive(Debug)]
pub struct Block(Vec<BlockLevelStatement>);
impl Parse for Block {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
let mut statements = Vec::new();
stream.expect(Token::BraceOpen)?;
while !matches!(stream.peek(), Some(Token::BraceClose)) {
let statement = stream.parse()?;
if let BlockLevelStatement::Return(_) = &statement {
statements.push(statement);
break; // Return has to be the last statement
}
statements.push(statement);
}
stream.expect(Token::BraceClose)?;
Ok(Block(statements))
}
}
#[derive(Debug)]
pub enum BlockLevelStatement {
Let(LetStatement),
Import(ImportStatement),
Expression(Expression),
Return(Expression),
}
impl Parse for BlockLevelStatement {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
use BlockLevelStatement as Stmt;
Ok(match stream.peek() {
Some(Token::LetKeyword) => Stmt::Let(stream.parse()?),
Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
Some(Token::ReturnKeyword) => {
stream.next();
let exp = stream.parse()?;
stream.expect(Token::Semi)?;
Stmt::Return(exp)
}
_ => {
if let Ok(e) = stream.parse() {
stream.expect(Token::Semi)?;
Stmt::Expression(e)
} else {
Err(())? // TODO: Add error raporting!
}
}
})
}
}