From dca604a038ec322635590a46511ac2b7a9338ab8 Mon Sep 17 00:00:00 2001 From: sofia Date: Wed, 2 Aug 2023 15:31:33 +0300 Subject: [PATCH] Add preliminary codegen, update parsing to require functions --- .gitignore | 3 +- reid/easiest.reid | 10 ++- src/codegen.rs | 112 ++++++++++++++++++++++++++ src/lexer.rs | 197 ++++++++++++++++++++++++++-------------------- src/main.rs | 24 +++++- src/parser.rs | 124 ++++++++++++++++++++++++----- 6 files changed, 356 insertions(+), 114 deletions(-) create mode 100644 src/codegen.rs diff --git a/.gitignore b/.gitignore index 0f84cc9..cbdf84f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target -/.vscode \ No newline at end of file +/.vscode +.env \ No newline at end of file diff --git a/reid/easiest.reid b/reid/easiest.reid index 5cd961a..62fd5b9 100644 --- a/reid/easiest.reid +++ b/reid/easiest.reid @@ -1,4 +1,8 @@ // Hello, comment here! -let hello = 32; -let beep = - hello ; \ No newline at end of file + +fn main() { + let hello = 32; + let beep = + hello ; + return beep; +} \ No newline at end of file diff --git a/src/codegen.rs b/src/codegen.rs new file mode 100644 index 0000000..232532f --- /dev/null +++ b/src/codegen.rs @@ -0,0 +1,112 @@ +use std::mem; + +use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule}; + +use crate::parser::Literal; + +macro_rules! cstr { + ($string:expr) => { + core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr() + }; +} + +#[derive(PartialEq, Eq)] +pub enum ValueType { + I32, +} + +impl ValueType { + unsafe fn get_llvm_type(&self, codegen: &mut CodeGenerator) -> LLVMTypeRef { + match *self { + Self::I32 => LLVMInt32TypeInContext(codegen.context), + } + } +} + +#[must_use = "value contains raw pointer and must be inserted somewhere"] +pub struct Value(ValueType, LLVMValueRef); + +pub struct CodeGenerator { + context: *mut LLVMContext, + module: *mut LLVMModule, + builder: *mut LLVMBuilder, +} + +impl CodeGenerator { + pub fn new() -> CodeGenerator { + unsafe { + // Set up a context, module and builder in that context. + let context = LLVMContextCreate(); + let module = LLVMModuleCreateWithNameInContext(cstr!("testmodule"), context); + let builder = LLVMCreateBuilderInContext(context); + + CodeGenerator { + context, + module, + builder, + } + } + } + + pub fn get_const(&mut self, literal_type: &Literal) -> Value { + unsafe { + match *literal_type { + Literal::I32(v) => Value( + ValueType::I32, + LLVMConstInt( + LLVMInt32TypeInContext(self.context), + mem::transmute(v as i64), + 1, + ), + ), + } + } + } + + pub fn add(&mut self, lhs: Value, rhs: Value) -> Result { + unsafe { + if lhs.0 == rhs.0 { + Ok(Value( + lhs.0, + LLVMBuildAdd(self.builder, lhs.1, rhs.1, cstr!("tmpadd")), + )) + } else { + Err(()) + } + } + } + + pub fn create_func(&mut self, ret: Value) { + unsafe { + let mut argts = []; + let func_type = LLVMFunctionType( + ret.0.get_llvm_type(self), + argts.as_mut_ptr(), + argts.len() as u32, + 0, + ); + + let anon_func = LLVMAddFunction(self.module, cstr!("_anon_func"), func_type); + + // Create a basic block in the function and set our builder to generate + // code in it. + let bb = LLVMAppendBasicBlockInContext(self.context, anon_func, cstr!("entry")); + LLVMPositionBuilderAtEnd(self.builder, bb); + + // Emit a `ret i64` into the function to return the computed sum. + LLVMBuildRet(self.builder, ret.1); + } + } +} + +impl Drop for CodeGenerator { + fn drop(&mut self) { + // Clean up. Values created in the context mostly get cleaned up there. + unsafe { + LLVMDisposeBuilder(self.builder); + LLVMDumpModule(self.module); + LLVMDisposeModule(self.module); + LLVMContextDispose(self.context); + } + } +} diff --git a/src/lexer.rs b/src/lexer.rs index 72b8b68..7199c73 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -2,31 +2,123 @@ use std::{fmt::Debug, iter::Peekable, str::Chars}; static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum Token { + // Values + Identifier(String), + /// Number with at most one decimal point + DecimalValue(String), + + // Keywords + /// `let` + LetKeyword, + /// `import` + ImportKeyword, + /// `return` + ReturnKeyword, + /// `fn` + FnKeyword, + + // Symbols + /// `;` + Semi, + /// `=` + Equals, + /// `:` + Colon, + /// `+` + Plus, + /// `*` + Times, + /// `(` + ParenOpen, + /// `)` + ParenClose, + /// `{` + BraceOpen, + /// `}` + BraceClose, + /// `,` + Comma, + + Eof, +} + +impl Token { + pub fn get_token_prec(&self) -> i8 { + match &self { + Token::Plus => 10, + Token::Times => 20, + _ => -1, + } + } +} + +#[derive(Clone)] +pub struct FullToken { + pub token: Token, + pub position: Position, +} + +impl Debug for FullToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "{:?} (Ln {}, Col {})", + self.token, self.position.1, self.position.0 + )) + } +} + +pub type Position = (u32, u32); + +const EOF_CHAR: char = '\0'; + +pub struct Cursor<'a> { + pub position: Position, + char_stream: Chars<'a>, +} + +impl<'a> Cursor<'a> { + fn next(&mut self) -> Option { + let next = self.char_stream.next(); + self.position.0 += 1; + if let Some('\n') = next { + self.position.1 += 1; + self.position.0 = 0; + } + next + } + + fn first(&mut self) -> Option { + // `.next()` optimizes better than `.nth(0)` + self.char_stream.clone().next() + } + + fn second(&mut self) -> Option { + // `.next()` optimizes better than `.nth(1)` + let mut stream = self.char_stream.clone(); + stream.next(); + stream.next() + } +} + pub fn tokenize>(to_tokenize: T) -> Result, String> { let to_tokenize = to_tokenize.into(); let mut position = (0, 1); let mut cursor = Cursor { - char_stream: to_tokenize.chars().peekable(), + char_stream: to_tokenize.chars(), position, }; let mut tokens = Vec::new(); while let Some(character) = &cursor.next() { - position.0 += 1; - if *character == '\n' { - position.1 += 1; - position.0 = 0; - } - - let peek = cursor.peek(); - let variant = match character { // Whitespace w if w.is_whitespace() => continue, // Comments - '/' if peek == Some(&'/') => { - while !matches!(&cursor.peek(), Some('\n')) { + '/' if cursor.first() == Some('/') => { + while !matches!(cursor.first(), Some('\n')) { cursor.next(); } continue; @@ -34,7 +126,7 @@ pub fn tokenize>(to_tokenize: T) -> Result, Strin // "words" c if c.is_alphabetic() => { let mut value = character.to_string(); - while let Some(c) = &cursor.peek() { + while let Some(c) = cursor.first() { if !c.is_ascii_alphanumeric() { break; } @@ -46,6 +138,8 @@ pub fn tokenize>(to_tokenize: T) -> Result, Strin let variant = match value.as_str() { "let" => Token::LetKeyword, "import" => Token::ImportKeyword, + "return" => Token::ReturnKeyword, + "fn" => Token::FnKeyword, _ => Token::Identifier(value), }; variant @@ -53,8 +147,8 @@ pub fn tokenize>(to_tokenize: T) -> Result, Strin // Decimals c if DECIMAL_NUMERICS.contains(c) => { let mut value = character.to_string(); - while let Some(c) = &cursor.peek() { - if !DECIMAL_NUMERICS.contains(c) { + while let Some(c) = cursor.first() { + if !DECIMAL_NUMERICS.contains(&c) { break; } value += &c.to_string(); @@ -64,12 +158,14 @@ pub fn tokenize>(to_tokenize: T) -> Result, Strin } // Single character tokens '=' => Token::Equals, - ';' => Token::Semicolon, + ';' => Token::Semi, ':' => Token::Colon, '+' => Token::Plus, '*' => Token::Times, '(' => Token::ParenOpen, ')' => Token::ParenClose, + '{' => Token::BraceOpen, + '}' => Token::BraceClose, ',' => Token::Comma, // Invalid token _ => Err(format!( @@ -93,74 +189,3 @@ pub fn tokenize>(to_tokenize: T) -> Result, Strin Ok(tokens) } - -#[derive(Debug, Eq, PartialEq, Clone)] -pub enum Token { - // Values - Identifier(String), - /// Number with at most one decimal point - DecimalValue(String), - - // Keywords - LetKeyword, - ImportKeyword, - - // Symbols - Semicolon, - Equals, - Colon, - Plus, - Times, - ParenOpen, // ( - ParenClose, // ) - Comma, - - Eof, -} - -impl Token { - pub fn get_token_prec(&self) -> i8 { - match &self { - Token::Plus => 10, - Token::Times => 20, - _ => -1, - } - } -} - -pub struct FullToken { - pub token: Token, - position: Position, -} - -impl Debug for FullToken { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( - "{:?} (Ln {}, Col {})", - self.token, self.position.1, self.position.0 - )) - } -} - -pub type Position = (u32, u32); - -pub struct Cursor<'a> { - pub position: Position, - char_stream: Peekable>, -} - -impl<'a> Cursor<'a> { - fn next(&mut self) -> Option { - let next = self.char_stream.next(); - self.position.0 += 1; - if let Some('\n') = next { - self.position.1 += 1; - self.position.0 = 0; - } - next - } - - fn peek(&mut self) -> Option<&char> { - self.char_stream.peek() - } -} diff --git a/src/main.rs b/src/main.rs index 744da99..fbf164c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,24 +1,40 @@ -use crate::{lexer::Token, parser::TopLevelStatement, token_stream::TokenStream}; +use crate::{ + codegen::CodeGenerator, lexer::Token, parser::TopLevelStatement, token_stream::TokenStream, +}; pub static EASIEST: &str = include_str!("../reid/easiest.reid"); pub static EASY: &str = include_str!("../reid/easy.reid"); pub static MEDIUM: &str = include_str!("../reid/medium.reid"); pub static HARD: &str = include_str!("../reid/hard.reid"); +mod codegen; mod lexer; mod parser; mod token_stream; +// TODO: +// 1. Make it so that TopLevelStatement can only be import or function def +// 2. Make BlockLevelStatement, that has everything TopLevelStatement has now +// 3. Make it so all codegen is done with a Block-struct, that represents a +// single proper block + fn main() { - let tokens = lexer::tokenize(EASY).unwrap(); + let tokens = lexer::tokenize(EASIEST).unwrap(); dbg!(&tokens); let mut token_stream = TokenStream::from(&tokens); - while let Ok(statement) = token_stream.parse::() { + while !matches!(token_stream.peek().unwrap_or(Token::Eof), Token::Eof) { + let statement = token_stream.parse::().unwrap(); dbg!(&statement); } - dbg!(token_stream.expect(Token::Eof).ok()); + let mut c = CodeGenerator::new(); + let x = c.get_const(&parser::Literal::I32(3)); + let y = c.get_const(&parser::Literal::I32(4)); + let add = c.add(x, y).unwrap(); + c.create_func(add); + + // dbg!(token_stream.expect(Token::Eof).ok()); } diff --git a/src/parser.rs b/src/parser.rs index f115f26..10d0c7d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7,12 +7,22 @@ where fn parse(stream: TokenStream) -> Result; } +#[derive(Debug, Clone)] +pub enum Literal { + I32(i32), +} + +#[derive(Debug, Clone)] +pub enum BinaryOperator { + Add, + Mult, +} + #[derive(Debug, Clone)] pub enum Expression { VariableName(String), - ContantI32(i32), - BinopAdd(Box, Box), - BinopMult(Box, Box), + Literal(Literal), + Binop(BinaryOperator, Box, Box), FunctionCall(Box), } @@ -29,7 +39,7 @@ fn parse_primary_expression(stream: &mut TokenStream) -> Result } else if let Some(token) = stream.next() { Ok(match &token { Token::Identifier(v) => Expression::VariableName(v.clone()), - Token::DecimalValue(v) => Expression::ContantI32(v.parse().unwrap()), + Token::DecimalValue(v) => Expression::Literal(Literal::I32(v.parse().unwrap())), _ => Err(())?, // TODO: Add error raporting! }) } else { @@ -66,9 +76,11 @@ fn parse_binop_rhs( } } + use BinaryOperator::*; + lhs = match &token { - Token::Plus => Expression::BinopAdd(Box::new(lhs), Box::new(rhs)), - Token::Times => Expression::BinopMult(Box::new(lhs), Box::new(rhs)), + Token::Plus => Expression::Binop(Add, Box::new(lhs), Box::new(rhs)), + Token::Times => Expression::Binop(Mult, Box::new(lhs), Box::new(rhs)), _ => Err(())?, // TODO: Add error raporting! }; } @@ -106,24 +118,17 @@ impl Parse for FunctionCallExpression { #[derive(Debug)] pub enum TopLevelStatement { - Let(LetStatement), Import(ImportStatement), - TLExpression(Expression), + FunctionDefinition(FunctionDefinition), } impl Parse for TopLevelStatement { fn parse(mut stream: TokenStream) -> Result { + use TopLevelStatement as Stmt; Ok(match stream.peek() { - Some(Token::LetKeyword) => TopLevelStatement::Let(stream.parse()?), - Some(Token::ImportKeyword) => TopLevelStatement::Import(stream.parse()?), - _ => { - if let Ok(e) = stream.parse() { - stream.expect(Token::Semicolon)?; - TopLevelStatement::TLExpression(e) - } else { - Err(())? // TODO: Add error raporting! - } - } + Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?), + Some(Token::FnKeyword) => Stmt::FunctionDefinition(stream.parse()?), + _ => Err(())?, // TODO: Add error raporting! }) } } @@ -139,7 +144,7 @@ impl Parse for LetStatement { stream.expect(Token::Equals)?; let expression = stream.parse()?; - stream.expect(Token::Semicolon)?; + stream.expect(Token::Semi)?; Ok(LetStatement(variable, expression)) } else { Err(()) // TODO: Add error raporting! @@ -169,8 +174,87 @@ impl Parse for ImportStatement { Err(())? // TODO: Add error raporting! } - stream.expect(Token::Semicolon)?; + stream.expect(Token::Semi)?; Ok(ImportStatement(import_list)) } } + +#[derive(Debug)] +pub struct FunctionDefinition(FunctionSignature, Block); + +impl Parse for FunctionDefinition { + fn parse(mut stream: TokenStream) -> Result { + stream.expect(Token::FnKeyword)?; + Ok(FunctionDefinition(stream.parse()?, stream.parse()?)) + } +} + +#[derive(Debug)] +pub struct FunctionSignature { + name: String, +} + +impl Parse for FunctionSignature { + fn parse(mut stream: TokenStream) -> Result { + if let Some(Token::Identifier(name)) = stream.next() { + stream.expect(Token::ParenOpen)?; + stream.expect(Token::ParenClose)?; + Ok(FunctionSignature { name }) + } else { + Err(()) // TODO: Add error raporting! + } + } +} + +#[derive(Debug)] +pub struct Block(Vec); + +impl Parse for Block { + fn parse(mut stream: TokenStream) -> Result { + let mut statements = Vec::new(); + stream.expect(Token::BraceOpen)?; + while !matches!(stream.peek(), Some(Token::BraceClose)) { + let statement = stream.parse()?; + if let BlockLevelStatement::Return(_) = &statement { + statements.push(statement); + break; // Return has to be the last statement + } + statements.push(statement); + } + stream.expect(Token::BraceClose)?; + Ok(Block(statements)) + } +} + +#[derive(Debug)] +pub enum BlockLevelStatement { + Let(LetStatement), + Import(ImportStatement), + Expression(Expression), + Return(Expression), +} + +impl Parse for BlockLevelStatement { + fn parse(mut stream: TokenStream) -> Result { + use BlockLevelStatement as Stmt; + Ok(match stream.peek() { + Some(Token::LetKeyword) => Stmt::Let(stream.parse()?), + Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?), + Some(Token::ReturnKeyword) => { + stream.next(); + let exp = stream.parse()?; + stream.expect(Token::Semi)?; + Stmt::Return(exp) + } + _ => { + if let Ok(e) = stream.parse() { + stream.expect(Token::Semi)?; + Stmt::Expression(e) + } else { + Err(())? // TODO: Add error raporting! + } + } + }) + } +}