From dca604a038ec322635590a46511ac2b7a9338ab8 Mon Sep 17 00:00:00 2001
From: sofia <sofia@teascade.net>
Date: Wed, 2 Aug 2023 15:31:33 +0300
Subject: [PATCH] Add preliminary codegen, update parsing to require functions

---
 .gitignore        |   3 +-
 reid/easiest.reid |  10 ++-
 src/codegen.rs    | 112 ++++++++++++++++++++++++++
 src/lexer.rs      | 197 ++++++++++++++++++++++++++--------------------
 src/main.rs       |  24 +++++-
 src/parser.rs     | 124 ++++++++++++++++++++++++-----
 6 files changed, 356 insertions(+), 114 deletions(-)
 create mode 100644 src/codegen.rs

diff --git a/.gitignore b/.gitignore
index 0f84cc9..cbdf84f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 /target
-/.vscode
\ No newline at end of file
+/.vscode
+.env
\ No newline at end of file
diff --git a/reid/easiest.reid b/reid/easiest.reid
index 5cd961a..62fd5b9 100644
--- a/reid/easiest.reid
+++ b/reid/easiest.reid
@@ -1,4 +1,8 @@
 // Hello, comment here!
-let hello = 32;
-let beep = 
-    hello ;
\ No newline at end of file
+
+fn main() {
+    let hello = 32;
+    let beep = 
+        hello ;
+    return beep;
+}
\ No newline at end of file
diff --git a/src/codegen.rs b/src/codegen.rs
new file mode 100644
index 0000000..232532f
--- /dev/null
+++ b/src/codegen.rs
@@ -0,0 +1,112 @@
+use std::mem;
+
+use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
+
+use crate::parser::Literal;
+
+macro_rules! cstr {
+    ($string:expr) => {
+        core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr()
+    };
+}
+
+#[derive(PartialEq, Eq)]
+pub enum ValueType {
+    I32,
+}
+
+impl ValueType {
+    unsafe fn get_llvm_type(&self, codegen: &mut CodeGenerator) -> LLVMTypeRef {
+        match *self {
+            Self::I32 => LLVMInt32TypeInContext(codegen.context),
+        }
+    }
+}
+
+#[must_use = "value contains raw pointer and must be inserted somewhere"]
+pub struct Value(ValueType, LLVMValueRef);
+
+pub struct CodeGenerator {
+    context: *mut LLVMContext,
+    module: *mut LLVMModule,
+    builder: *mut LLVMBuilder,
+}
+
+impl CodeGenerator {
+    pub fn new() -> CodeGenerator {
+        unsafe {
+            // Set up a context, module and builder in that context.
+            let context = LLVMContextCreate();
+            let module = LLVMModuleCreateWithNameInContext(cstr!("testmodule"), context);
+            let builder = LLVMCreateBuilderInContext(context);
+
+            CodeGenerator {
+                context,
+                module,
+                builder,
+            }
+        }
+    }
+
+    pub fn get_const(&mut self, literal_type: &Literal) -> Value {
+        unsafe {
+            match *literal_type {
+                Literal::I32(v) => Value(
+                    ValueType::I32,
+                    LLVMConstInt(
+                        LLVMInt32TypeInContext(self.context),
+                        mem::transmute(v as i64),
+                        1,
+                    ),
+                ),
+            }
+        }
+    }
+
+    pub fn add(&mut self, lhs: Value, rhs: Value) -> Result<Value, ()> {
+        unsafe {
+            if lhs.0 == rhs.0 {
+                Ok(Value(
+                    lhs.0,
+                    LLVMBuildAdd(self.builder, lhs.1, rhs.1, cstr!("tmpadd")),
+                ))
+            } else {
+                Err(())
+            }
+        }
+    }
+
+    pub fn create_func(&mut self, ret: Value) {
+        unsafe {
+            let mut argts = [];
+            let func_type = LLVMFunctionType(
+                ret.0.get_llvm_type(self),
+                argts.as_mut_ptr(),
+                argts.len() as u32,
+                0,
+            );
+
+            let anon_func = LLVMAddFunction(self.module, cstr!("_anon_func"), func_type);
+
+            // Create a basic block in the function and set our builder to generate
+            // code in it.
+            let bb = LLVMAppendBasicBlockInContext(self.context, anon_func, cstr!("entry"));
+            LLVMPositionBuilderAtEnd(self.builder, bb);
+
+            // Emit a `ret i64` into the function to return the computed sum.
+            LLVMBuildRet(self.builder, ret.1);
+        }
+    }
+}
+
+impl Drop for CodeGenerator {
+    fn drop(&mut self) {
+        // Clean up. Values created in the context mostly get cleaned up there.
+        unsafe {
+            LLVMDisposeBuilder(self.builder);
+            LLVMDumpModule(self.module);
+            LLVMDisposeModule(self.module);
+            LLVMContextDispose(self.context);
+        }
+    }
+}
diff --git a/src/lexer.rs b/src/lexer.rs
index 72b8b68..7199c73 100644
--- a/src/lexer.rs
+++ b/src/lexer.rs
@@ -2,31 +2,123 @@ use std::{fmt::Debug, iter::Peekable, str::Chars};
 
 static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
 
+#[derive(Debug, Eq, PartialEq, Clone)]
+pub enum Token {
+    // Values
+    Identifier(String),
+    /// Number with at most one decimal point
+    DecimalValue(String),
+
+    // Keywords
+    /// `let`
+    LetKeyword,
+    /// `import`
+    ImportKeyword,
+    /// `return`
+    ReturnKeyword,
+    /// `fn`
+    FnKeyword,
+
+    // Symbols
+    /// `;`
+    Semi,
+    /// `=`
+    Equals,
+    /// `:`
+    Colon,
+    /// `+`
+    Plus,
+    /// `*`
+    Times,
+    /// `(`
+    ParenOpen,
+    /// `)`
+    ParenClose,
+    /// `{`
+    BraceOpen,
+    /// `}`
+    BraceClose,
+    /// `,`
+    Comma,
+
+    Eof,
+}
+
+impl Token {
+    pub fn get_token_prec(&self) -> i8 {
+        match &self {
+            Token::Plus => 10,
+            Token::Times => 20,
+            _ => -1,
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct FullToken {
+    pub token: Token,
+    pub position: Position,
+}
+
+impl Debug for FullToken {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_fmt(format_args!(
+            "{:?} (Ln {}, Col {})",
+            self.token, self.position.1, self.position.0
+        ))
+    }
+}
+
+pub type Position = (u32, u32);
+
+const EOF_CHAR: char = '\0';
+
+pub struct Cursor<'a> {
+    pub position: Position,
+    char_stream: Chars<'a>,
+}
+
+impl<'a> Cursor<'a> {
+    fn next(&mut self) -> Option<char> {
+        let next = self.char_stream.next();
+        self.position.0 += 1;
+        if let Some('\n') = next {
+            self.position.1 += 1;
+            self.position.0 = 0;
+        }
+        next
+    }
+
+    fn first(&mut self) -> Option<char> {
+        // `.next()` optimizes better than `.nth(0)`
+        self.char_stream.clone().next()
+    }
+
+    fn second(&mut self) -> Option<char> {
+        // `.next()` optimizes better than `.nth(1)`
+        let mut stream = self.char_stream.clone();
+        stream.next();
+        stream.next()
+    }
+}
+
 pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, String> {
     let to_tokenize = to_tokenize.into();
     let mut position = (0, 1);
     let mut cursor = Cursor {
-        char_stream: to_tokenize.chars().peekable(),
+        char_stream: to_tokenize.chars(),
         position,
     };
 
     let mut tokens = Vec::new();
 
     while let Some(character) = &cursor.next() {
-        position.0 += 1;
-        if *character == '\n' {
-            position.1 += 1;
-            position.0 = 0;
-        }
-
-        let peek = cursor.peek();
-
         let variant = match character {
             // Whitespace
             w if w.is_whitespace() => continue,
             // Comments
-            '/' if peek == Some(&'/') => {
-                while !matches!(&cursor.peek(), Some('\n')) {
+            '/' if cursor.first() == Some('/') => {
+                while !matches!(cursor.first(), Some('\n')) {
                     cursor.next();
                 }
                 continue;
@@ -34,7 +126,7 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
             // "words"
             c if c.is_alphabetic() => {
                 let mut value = character.to_string();
-                while let Some(c) = &cursor.peek() {
+                while let Some(c) = cursor.first() {
                     if !c.is_ascii_alphanumeric() {
                         break;
                     }
@@ -46,6 +138,8 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
                 let variant = match value.as_str() {
                     "let" => Token::LetKeyword,
                     "import" => Token::ImportKeyword,
+                    "return" => Token::ReturnKeyword,
+                    "fn" => Token::FnKeyword,
                     _ => Token::Identifier(value),
                 };
                 variant
@@ -53,8 +147,8 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
             // Decimals
             c if DECIMAL_NUMERICS.contains(c) => {
                 let mut value = character.to_string();
-                while let Some(c) = &cursor.peek() {
-                    if !DECIMAL_NUMERICS.contains(c) {
+                while let Some(c) = cursor.first() {
+                    if !DECIMAL_NUMERICS.contains(&c) {
                         break;
                     }
                     value += &c.to_string();
@@ -64,12 +158,14 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
             }
             // Single character tokens
             '=' => Token::Equals,
-            ';' => Token::Semicolon,
+            ';' => Token::Semi,
             ':' => Token::Colon,
             '+' => Token::Plus,
             '*' => Token::Times,
             '(' => Token::ParenOpen,
             ')' => Token::ParenClose,
+            '{' => Token::BraceOpen,
+            '}' => Token::BraceClose,
             ',' => Token::Comma,
             // Invalid token
             _ => Err(format!(
@@ -93,74 +189,3 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
 
     Ok(tokens)
 }
-
-#[derive(Debug, Eq, PartialEq, Clone)]
-pub enum Token {
-    // Values
-    Identifier(String),
-    /// Number with at most one decimal point
-    DecimalValue(String),
-
-    // Keywords
-    LetKeyword,
-    ImportKeyword,
-
-    // Symbols
-    Semicolon,
-    Equals,
-    Colon,
-    Plus,
-    Times,
-    ParenOpen,  // (
-    ParenClose, // )
-    Comma,
-
-    Eof,
-}
-
-impl Token {
-    pub fn get_token_prec(&self) -> i8 {
-        match &self {
-            Token::Plus => 10,
-            Token::Times => 20,
-            _ => -1,
-        }
-    }
-}
-
-pub struct FullToken {
-    pub token: Token,
-    position: Position,
-}
-
-impl Debug for FullToken {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_fmt(format_args!(
-            "{:?} (Ln {}, Col {})",
-            self.token, self.position.1, self.position.0
-        ))
-    }
-}
-
-pub type Position = (u32, u32);
-
-pub struct Cursor<'a> {
-    pub position: Position,
-    char_stream: Peekable<Chars<'a>>,
-}
-
-impl<'a> Cursor<'a> {
-    fn next(&mut self) -> Option<char> {
-        let next = self.char_stream.next();
-        self.position.0 += 1;
-        if let Some('\n') = next {
-            self.position.1 += 1;
-            self.position.0 = 0;
-        }
-        next
-    }
-
-    fn peek(&mut self) -> Option<&char> {
-        self.char_stream.peek()
-    }
-}
diff --git a/src/main.rs b/src/main.rs
index 744da99..fbf164c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,24 +1,40 @@
-use crate::{lexer::Token, parser::TopLevelStatement, token_stream::TokenStream};
+use crate::{
+    codegen::CodeGenerator, lexer::Token, parser::TopLevelStatement, token_stream::TokenStream,
+};
 
 pub static EASIEST: &str = include_str!("../reid/easiest.reid");
 pub static EASY: &str = include_str!("../reid/easy.reid");
 pub static MEDIUM: &str = include_str!("../reid/medium.reid");
 pub static HARD: &str = include_str!("../reid/hard.reid");
 
+mod codegen;
 mod lexer;
 mod parser;
 mod token_stream;
 
+// TODO:
+// 1. Make it so that TopLevelStatement can only be import or function def
+// 2. Make BlockLevelStatement, that has everything TopLevelStatement has now
+// 3. Make it so all codegen is done with a Block-struct, that represents a
+//    single proper block
+
 fn main() {
-    let tokens = lexer::tokenize(EASY).unwrap();
+    let tokens = lexer::tokenize(EASIEST).unwrap();
 
     dbg!(&tokens);
 
     let mut token_stream = TokenStream::from(&tokens);
 
-    while let Ok(statement) = token_stream.parse::<TopLevelStatement>() {
+    while !matches!(token_stream.peek().unwrap_or(Token::Eof), Token::Eof) {
+        let statement = token_stream.parse::<TopLevelStatement>().unwrap();
         dbg!(&statement);
     }
 
-    dbg!(token_stream.expect(Token::Eof).ok());
+    let mut c = CodeGenerator::new();
+    let x = c.get_const(&parser::Literal::I32(3));
+    let y = c.get_const(&parser::Literal::I32(4));
+    let add = c.add(x, y).unwrap();
+    c.create_func(add);
+
+    // dbg!(token_stream.expect(Token::Eof).ok());
 }
diff --git a/src/parser.rs b/src/parser.rs
index f115f26..10d0c7d 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -7,12 +7,22 @@ where
     fn parse(stream: TokenStream) -> Result<Self, ()>;
 }
 
+#[derive(Debug, Clone)]
+pub enum Literal {
+    I32(i32),
+}
+
+#[derive(Debug, Clone)]
+pub enum BinaryOperator {
+    Add,
+    Mult,
+}
+
 #[derive(Debug, Clone)]
 pub enum Expression {
     VariableName(String),
-    ContantI32(i32),
-    BinopAdd(Box<Expression>, Box<Expression>),
-    BinopMult(Box<Expression>, Box<Expression>),
+    Literal(Literal),
+    Binop(BinaryOperator, Box<Expression>, Box<Expression>),
     FunctionCall(Box<FunctionCallExpression>),
 }
 
@@ -29,7 +39,7 @@ fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, ()>
     } else if let Some(token) = stream.next() {
         Ok(match &token {
             Token::Identifier(v) => Expression::VariableName(v.clone()),
-            Token::DecimalValue(v) => Expression::ContantI32(v.parse().unwrap()),
+            Token::DecimalValue(v) => Expression::Literal(Literal::I32(v.parse().unwrap())),
             _ => Err(())?, // TODO: Add error raporting!
         })
     } else {
@@ -66,9 +76,11 @@ fn parse_binop_rhs(
                 }
             }
 
+            use BinaryOperator::*;
+
             lhs = match &token {
-                Token::Plus => Expression::BinopAdd(Box::new(lhs), Box::new(rhs)),
-                Token::Times => Expression::BinopMult(Box::new(lhs), Box::new(rhs)),
+                Token::Plus => Expression::Binop(Add, Box::new(lhs), Box::new(rhs)),
+                Token::Times => Expression::Binop(Mult, Box::new(lhs), Box::new(rhs)),
                 _ => Err(())?, // TODO: Add error raporting!
             };
         }
@@ -106,24 +118,17 @@ impl Parse for FunctionCallExpression {
 
 #[derive(Debug)]
 pub enum TopLevelStatement {
-    Let(LetStatement),
     Import(ImportStatement),
-    TLExpression(Expression),
+    FunctionDefinition(FunctionDefinition),
 }
 
 impl Parse for TopLevelStatement {
     fn parse(mut stream: TokenStream) -> Result<Self, ()> {
+        use TopLevelStatement as Stmt;
         Ok(match stream.peek() {
-            Some(Token::LetKeyword) => TopLevelStatement::Let(stream.parse()?),
-            Some(Token::ImportKeyword) => TopLevelStatement::Import(stream.parse()?),
-            _ => {
-                if let Ok(e) = stream.parse() {
-                    stream.expect(Token::Semicolon)?;
-                    TopLevelStatement::TLExpression(e)
-                } else {
-                    Err(())? // TODO: Add error raporting!
-                }
-            }
+            Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
+            Some(Token::FnKeyword) => Stmt::FunctionDefinition(stream.parse()?),
+            _ => Err(())?, // TODO: Add error raporting!
         })
     }
 }
@@ -139,7 +144,7 @@ impl Parse for LetStatement {
             stream.expect(Token::Equals)?;
 
             let expression = stream.parse()?;
-            stream.expect(Token::Semicolon)?;
+            stream.expect(Token::Semi)?;
             Ok(LetStatement(variable, expression))
         } else {
             Err(()) // TODO: Add error raporting!
@@ -169,8 +174,87 @@ impl Parse for ImportStatement {
             Err(())? // TODO: Add error raporting!
         }
 
-        stream.expect(Token::Semicolon)?;
+        stream.expect(Token::Semi)?;
 
         Ok(ImportStatement(import_list))
     }
 }
+
+#[derive(Debug)]
+pub struct FunctionDefinition(FunctionSignature, Block);
+
+impl Parse for FunctionDefinition {
+    fn parse(mut stream: TokenStream) -> Result<Self, ()> {
+        stream.expect(Token::FnKeyword)?;
+        Ok(FunctionDefinition(stream.parse()?, stream.parse()?))
+    }
+}
+
+#[derive(Debug)]
+pub struct FunctionSignature {
+    name: String,
+}
+
+impl Parse for FunctionSignature {
+    fn parse(mut stream: TokenStream) -> Result<Self, ()> {
+        if let Some(Token::Identifier(name)) = stream.next() {
+            stream.expect(Token::ParenOpen)?;
+            stream.expect(Token::ParenClose)?;
+            Ok(FunctionSignature { name })
+        } else {
+            Err(()) // TODO: Add error raporting!
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct Block(Vec<BlockLevelStatement>);
+
+impl Parse for Block {
+    fn parse(mut stream: TokenStream) -> Result<Self, ()> {
+        let mut statements = Vec::new();
+        stream.expect(Token::BraceOpen)?;
+        while !matches!(stream.peek(), Some(Token::BraceClose)) {
+            let statement = stream.parse()?;
+            if let BlockLevelStatement::Return(_) = &statement {
+                statements.push(statement);
+                break; // Return has to be the last statement
+            }
+            statements.push(statement);
+        }
+        stream.expect(Token::BraceClose)?;
+        Ok(Block(statements))
+    }
+}
+
+#[derive(Debug)]
+pub enum BlockLevelStatement {
+    Let(LetStatement),
+    Import(ImportStatement),
+    Expression(Expression),
+    Return(Expression),
+}
+
+impl Parse for BlockLevelStatement {
+    fn parse(mut stream: TokenStream) -> Result<Self, ()> {
+        use BlockLevelStatement as Stmt;
+        Ok(match stream.peek() {
+            Some(Token::LetKeyword) => Stmt::Let(stream.parse()?),
+            Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
+            Some(Token::ReturnKeyword) => {
+                stream.next();
+                let exp = stream.parse()?;
+                stream.expect(Token::Semi)?;
+                Stmt::Return(exp)
+            }
+            _ => {
+                if let Ok(e) = stream.parse() {
+                    stream.expect(Token::Semi)?;
+                    Stmt::Expression(e)
+                } else {
+                    Err(())? // TODO: Add error raporting!
+                }
+            }
+        })
+    }
+}