Add preliminary codegen, update parsing to require functions

This commit is contained in:
Sofia 2023-08-02 15:31:33 +03:00
parent 17ab6a0cea
commit dca604a038
6 changed files with 356 additions and 114 deletions

3
.gitignore vendored
View File

@ -1,2 +1,3 @@
/target
/.vscode
/.vscode
.env

View File

@ -1,4 +1,8 @@
// Hello, comment here!
let hello = 32;
let beep =
hello ;
fn main() {
let hello = 32;
let beep =
hello ;
return beep;
}

112
src/codegen.rs Normal file
View File

@ -0,0 +1,112 @@
use std::mem;
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
use crate::parser::Literal;
macro_rules! cstr {
($string:expr) => {
core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr()
};
}
#[derive(PartialEq, Eq)]
pub enum ValueType {
I32,
}
impl ValueType {
unsafe fn get_llvm_type(&self, codegen: &mut CodeGenerator) -> LLVMTypeRef {
match *self {
Self::I32 => LLVMInt32TypeInContext(codegen.context),
}
}
}
#[must_use = "value contains raw pointer and must be inserted somewhere"]
pub struct Value(ValueType, LLVMValueRef);
pub struct CodeGenerator {
context: *mut LLVMContext,
module: *mut LLVMModule,
builder: *mut LLVMBuilder,
}
impl CodeGenerator {
pub fn new() -> CodeGenerator {
unsafe {
// Set up a context, module and builder in that context.
let context = LLVMContextCreate();
let module = LLVMModuleCreateWithNameInContext(cstr!("testmodule"), context);
let builder = LLVMCreateBuilderInContext(context);
CodeGenerator {
context,
module,
builder,
}
}
}
pub fn get_const(&mut self, literal_type: &Literal) -> Value {
unsafe {
match *literal_type {
Literal::I32(v) => Value(
ValueType::I32,
LLVMConstInt(
LLVMInt32TypeInContext(self.context),
mem::transmute(v as i64),
1,
),
),
}
}
}
pub fn add(&mut self, lhs: Value, rhs: Value) -> Result<Value, ()> {
unsafe {
if lhs.0 == rhs.0 {
Ok(Value(
lhs.0,
LLVMBuildAdd(self.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(())
}
}
}
pub fn create_func(&mut self, ret: Value) {
unsafe {
let mut argts = [];
let func_type = LLVMFunctionType(
ret.0.get_llvm_type(self),
argts.as_mut_ptr(),
argts.len() as u32,
0,
);
let anon_func = LLVMAddFunction(self.module, cstr!("_anon_func"), func_type);
// Create a basic block in the function and set our builder to generate
// code in it.
let bb = LLVMAppendBasicBlockInContext(self.context, anon_func, cstr!("entry"));
LLVMPositionBuilderAtEnd(self.builder, bb);
// Emit a `ret i64` into the function to return the computed sum.
LLVMBuildRet(self.builder, ret.1);
}
}
}
impl Drop for CodeGenerator {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeBuilder(self.builder);
LLVMDumpModule(self.module);
LLVMDisposeModule(self.module);
LLVMContextDispose(self.context);
}
}
}

View File

@ -2,31 +2,123 @@ use std::{fmt::Debug, iter::Peekable, str::Chars};
static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum Token {
// Values
Identifier(String),
/// Number with at most one decimal point
DecimalValue(String),
// Keywords
/// `let`
LetKeyword,
/// `import`
ImportKeyword,
/// `return`
ReturnKeyword,
/// `fn`
FnKeyword,
// Symbols
/// `;`
Semi,
/// `=`
Equals,
/// `:`
Colon,
/// `+`
Plus,
/// `*`
Times,
/// `(`
ParenOpen,
/// `)`
ParenClose,
/// `{`
BraceOpen,
/// `}`
BraceClose,
/// `,`
Comma,
Eof,
}
impl Token {
pub fn get_token_prec(&self) -> i8 {
match &self {
Token::Plus => 10,
Token::Times => 20,
_ => -1,
}
}
}
#[derive(Clone)]
pub struct FullToken {
pub token: Token,
pub position: Position,
}
impl Debug for FullToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"{:?} (Ln {}, Col {})",
self.token, self.position.1, self.position.0
))
}
}
pub type Position = (u32, u32);
const EOF_CHAR: char = '\0';
pub struct Cursor<'a> {
pub position: Position,
char_stream: Chars<'a>,
}
impl<'a> Cursor<'a> {
fn next(&mut self) -> Option<char> {
let next = self.char_stream.next();
self.position.0 += 1;
if let Some('\n') = next {
self.position.1 += 1;
self.position.0 = 0;
}
next
}
fn first(&mut self) -> Option<char> {
// `.next()` optimizes better than `.nth(0)`
self.char_stream.clone().next()
}
fn second(&mut self) -> Option<char> {
// `.next()` optimizes better than `.nth(1)`
let mut stream = self.char_stream.clone();
stream.next();
stream.next()
}
}
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, String> {
let to_tokenize = to_tokenize.into();
let mut position = (0, 1);
let mut cursor = Cursor {
char_stream: to_tokenize.chars().peekable(),
char_stream: to_tokenize.chars(),
position,
};
let mut tokens = Vec::new();
while let Some(character) = &cursor.next() {
position.0 += 1;
if *character == '\n' {
position.1 += 1;
position.0 = 0;
}
let peek = cursor.peek();
let variant = match character {
// Whitespace
w if w.is_whitespace() => continue,
// Comments
'/' if peek == Some(&'/') => {
while !matches!(&cursor.peek(), Some('\n')) {
'/' if cursor.first() == Some('/') => {
while !matches!(cursor.first(), Some('\n')) {
cursor.next();
}
continue;
@ -34,7 +126,7 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
// "words"
c if c.is_alphabetic() => {
let mut value = character.to_string();
while let Some(c) = &cursor.peek() {
while let Some(c) = cursor.first() {
if !c.is_ascii_alphanumeric() {
break;
}
@ -46,6 +138,8 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
let variant = match value.as_str() {
"let" => Token::LetKeyword,
"import" => Token::ImportKeyword,
"return" => Token::ReturnKeyword,
"fn" => Token::FnKeyword,
_ => Token::Identifier(value),
};
variant
@ -53,8 +147,8 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
// Decimals
c if DECIMAL_NUMERICS.contains(c) => {
let mut value = character.to_string();
while let Some(c) = &cursor.peek() {
if !DECIMAL_NUMERICS.contains(c) {
while let Some(c) = cursor.first() {
if !DECIMAL_NUMERICS.contains(&c) {
break;
}
value += &c.to_string();
@ -64,12 +158,14 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
}
// Single character tokens
'=' => Token::Equals,
';' => Token::Semicolon,
';' => Token::Semi,
':' => Token::Colon,
'+' => Token::Plus,
'*' => Token::Times,
'(' => Token::ParenOpen,
')' => Token::ParenClose,
'{' => Token::BraceOpen,
'}' => Token::BraceClose,
',' => Token::Comma,
// Invalid token
_ => Err(format!(
@ -93,74 +189,3 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
Ok(tokens)
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum Token {
// Values
Identifier(String),
/// Number with at most one decimal point
DecimalValue(String),
// Keywords
LetKeyword,
ImportKeyword,
// Symbols
Semicolon,
Equals,
Colon,
Plus,
Times,
ParenOpen, // (
ParenClose, // )
Comma,
Eof,
}
impl Token {
pub fn get_token_prec(&self) -> i8 {
match &self {
Token::Plus => 10,
Token::Times => 20,
_ => -1,
}
}
}
pub struct FullToken {
pub token: Token,
position: Position,
}
impl Debug for FullToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"{:?} (Ln {}, Col {})",
self.token, self.position.1, self.position.0
))
}
}
pub type Position = (u32, u32);
pub struct Cursor<'a> {
pub position: Position,
char_stream: Peekable<Chars<'a>>,
}
impl<'a> Cursor<'a> {
fn next(&mut self) -> Option<char> {
let next = self.char_stream.next();
self.position.0 += 1;
if let Some('\n') = next {
self.position.1 += 1;
self.position.0 = 0;
}
next
}
fn peek(&mut self) -> Option<&char> {
self.char_stream.peek()
}
}

View File

@ -1,24 +1,40 @@
use crate::{lexer::Token, parser::TopLevelStatement, token_stream::TokenStream};
use crate::{
codegen::CodeGenerator, lexer::Token, parser::TopLevelStatement, token_stream::TokenStream,
};
pub static EASIEST: &str = include_str!("../reid/easiest.reid");
pub static EASY: &str = include_str!("../reid/easy.reid");
pub static MEDIUM: &str = include_str!("../reid/medium.reid");
pub static HARD: &str = include_str!("../reid/hard.reid");
mod codegen;
mod lexer;
mod parser;
mod token_stream;
// TODO:
// 1. Make it so that TopLevelStatement can only be import or function def
// 2. Make BlockLevelStatement, that has everything TopLevelStatement has now
// 3. Make it so all codegen is done with a Block-struct, that represents a
// single proper block
fn main() {
let tokens = lexer::tokenize(EASY).unwrap();
let tokens = lexer::tokenize(EASIEST).unwrap();
dbg!(&tokens);
let mut token_stream = TokenStream::from(&tokens);
while let Ok(statement) = token_stream.parse::<TopLevelStatement>() {
while !matches!(token_stream.peek().unwrap_or(Token::Eof), Token::Eof) {
let statement = token_stream.parse::<TopLevelStatement>().unwrap();
dbg!(&statement);
}
dbg!(token_stream.expect(Token::Eof).ok());
let mut c = CodeGenerator::new();
let x = c.get_const(&parser::Literal::I32(3));
let y = c.get_const(&parser::Literal::I32(4));
let add = c.add(x, y).unwrap();
c.create_func(add);
// dbg!(token_stream.expect(Token::Eof).ok());
}

View File

@ -7,12 +7,22 @@ where
fn parse(stream: TokenStream) -> Result<Self, ()>;
}
#[derive(Debug, Clone)]
pub enum Literal {
I32(i32),
}
#[derive(Debug, Clone)]
pub enum BinaryOperator {
Add,
Mult,
}
#[derive(Debug, Clone)]
pub enum Expression {
VariableName(String),
ContantI32(i32),
BinopAdd(Box<Expression>, Box<Expression>),
BinopMult(Box<Expression>, Box<Expression>),
Literal(Literal),
Binop(BinaryOperator, Box<Expression>, Box<Expression>),
FunctionCall(Box<FunctionCallExpression>),
}
@ -29,7 +39,7 @@ fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, ()>
} else if let Some(token) = stream.next() {
Ok(match &token {
Token::Identifier(v) => Expression::VariableName(v.clone()),
Token::DecimalValue(v) => Expression::ContantI32(v.parse().unwrap()),
Token::DecimalValue(v) => Expression::Literal(Literal::I32(v.parse().unwrap())),
_ => Err(())?, // TODO: Add error raporting!
})
} else {
@ -66,9 +76,11 @@ fn parse_binop_rhs(
}
}
use BinaryOperator::*;
lhs = match &token {
Token::Plus => Expression::BinopAdd(Box::new(lhs), Box::new(rhs)),
Token::Times => Expression::BinopMult(Box::new(lhs), Box::new(rhs)),
Token::Plus => Expression::Binop(Add, Box::new(lhs), Box::new(rhs)),
Token::Times => Expression::Binop(Mult, Box::new(lhs), Box::new(rhs)),
_ => Err(())?, // TODO: Add error raporting!
};
}
@ -106,24 +118,17 @@ impl Parse for FunctionCallExpression {
#[derive(Debug)]
pub enum TopLevelStatement {
Let(LetStatement),
Import(ImportStatement),
TLExpression(Expression),
FunctionDefinition(FunctionDefinition),
}
impl Parse for TopLevelStatement {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
use TopLevelStatement as Stmt;
Ok(match stream.peek() {
Some(Token::LetKeyword) => TopLevelStatement::Let(stream.parse()?),
Some(Token::ImportKeyword) => TopLevelStatement::Import(stream.parse()?),
_ => {
if let Ok(e) = stream.parse() {
stream.expect(Token::Semicolon)?;
TopLevelStatement::TLExpression(e)
} else {
Err(())? // TODO: Add error raporting!
}
}
Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
Some(Token::FnKeyword) => Stmt::FunctionDefinition(stream.parse()?),
_ => Err(())?, // TODO: Add error raporting!
})
}
}
@ -139,7 +144,7 @@ impl Parse for LetStatement {
stream.expect(Token::Equals)?;
let expression = stream.parse()?;
stream.expect(Token::Semicolon)?;
stream.expect(Token::Semi)?;
Ok(LetStatement(variable, expression))
} else {
Err(()) // TODO: Add error raporting!
@ -169,8 +174,87 @@ impl Parse for ImportStatement {
Err(())? // TODO: Add error raporting!
}
stream.expect(Token::Semicolon)?;
stream.expect(Token::Semi)?;
Ok(ImportStatement(import_list))
}
}
#[derive(Debug)]
pub struct FunctionDefinition(FunctionSignature, Block);
impl Parse for FunctionDefinition {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
stream.expect(Token::FnKeyword)?;
Ok(FunctionDefinition(stream.parse()?, stream.parse()?))
}
}
#[derive(Debug)]
pub struct FunctionSignature {
name: String,
}
impl Parse for FunctionSignature {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
if let Some(Token::Identifier(name)) = stream.next() {
stream.expect(Token::ParenOpen)?;
stream.expect(Token::ParenClose)?;
Ok(FunctionSignature { name })
} else {
Err(()) // TODO: Add error raporting!
}
}
}
#[derive(Debug)]
pub struct Block(Vec<BlockLevelStatement>);
impl Parse for Block {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
let mut statements = Vec::new();
stream.expect(Token::BraceOpen)?;
while !matches!(stream.peek(), Some(Token::BraceClose)) {
let statement = stream.parse()?;
if let BlockLevelStatement::Return(_) = &statement {
statements.push(statement);
break; // Return has to be the last statement
}
statements.push(statement);
}
stream.expect(Token::BraceClose)?;
Ok(Block(statements))
}
}
#[derive(Debug)]
pub enum BlockLevelStatement {
Let(LetStatement),
Import(ImportStatement),
Expression(Expression),
Return(Expression),
}
impl Parse for BlockLevelStatement {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
use BlockLevelStatement as Stmt;
Ok(match stream.peek() {
Some(Token::LetKeyword) => Stmt::Let(stream.parse()?),
Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
Some(Token::ReturnKeyword) => {
stream.next();
let exp = stream.parse()?;
stream.expect(Token::Semi)?;
Stmt::Return(exp)
}
_ => {
if let Ok(e) = stream.parse() {
stream.expect(Token::Semi)?;
Stmt::Expression(e)
} else {
Err(())? // TODO: Add error raporting!
}
}
})
}
}