Add preliminary codegen, update parsing to require functions
This commit is contained in:
parent
17ab6a0cea
commit
dca604a038
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,2 +1,3 @@
|
||||
/target
|
||||
/.vscode
|
||||
/.vscode
|
||||
.env
|
@ -1,4 +1,8 @@
|
||||
// Hello, comment here!
|
||||
let hello = 32;
|
||||
let beep =
|
||||
hello ;
|
||||
|
||||
fn main() {
|
||||
let hello = 32;
|
||||
let beep =
|
||||
hello ;
|
||||
return beep;
|
||||
}
|
112
src/codegen.rs
Normal file
112
src/codegen.rs
Normal file
@ -0,0 +1,112 @@
|
||||
use std::mem;
|
||||
|
||||
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
|
||||
|
||||
use crate::parser::Literal;
|
||||
|
||||
macro_rules! cstr {
|
||||
($string:expr) => {
|
||||
core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr()
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub enum ValueType {
|
||||
I32,
|
||||
}
|
||||
|
||||
impl ValueType {
|
||||
unsafe fn get_llvm_type(&self, codegen: &mut CodeGenerator) -> LLVMTypeRef {
|
||||
match *self {
|
||||
Self::I32 => LLVMInt32TypeInContext(codegen.context),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use = "value contains raw pointer and must be inserted somewhere"]
|
||||
pub struct Value(ValueType, LLVMValueRef);
|
||||
|
||||
pub struct CodeGenerator {
|
||||
context: *mut LLVMContext,
|
||||
module: *mut LLVMModule,
|
||||
builder: *mut LLVMBuilder,
|
||||
}
|
||||
|
||||
impl CodeGenerator {
|
||||
pub fn new() -> CodeGenerator {
|
||||
unsafe {
|
||||
// Set up a context, module and builder in that context.
|
||||
let context = LLVMContextCreate();
|
||||
let module = LLVMModuleCreateWithNameInContext(cstr!("testmodule"), context);
|
||||
let builder = LLVMCreateBuilderInContext(context);
|
||||
|
||||
CodeGenerator {
|
||||
context,
|
||||
module,
|
||||
builder,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_const(&mut self, literal_type: &Literal) -> Value {
|
||||
unsafe {
|
||||
match *literal_type {
|
||||
Literal::I32(v) => Value(
|
||||
ValueType::I32,
|
||||
LLVMConstInt(
|
||||
LLVMInt32TypeInContext(self.context),
|
||||
mem::transmute(v as i64),
|
||||
1,
|
||||
),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&mut self, lhs: Value, rhs: Value) -> Result<Value, ()> {
|
||||
unsafe {
|
||||
if lhs.0 == rhs.0 {
|
||||
Ok(Value(
|
||||
lhs.0,
|
||||
LLVMBuildAdd(self.builder, lhs.1, rhs.1, cstr!("tmpadd")),
|
||||
))
|
||||
} else {
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_func(&mut self, ret: Value) {
|
||||
unsafe {
|
||||
let mut argts = [];
|
||||
let func_type = LLVMFunctionType(
|
||||
ret.0.get_llvm_type(self),
|
||||
argts.as_mut_ptr(),
|
||||
argts.len() as u32,
|
||||
0,
|
||||
);
|
||||
|
||||
let anon_func = LLVMAddFunction(self.module, cstr!("_anon_func"), func_type);
|
||||
|
||||
// Create a basic block in the function and set our builder to generate
|
||||
// code in it.
|
||||
let bb = LLVMAppendBasicBlockInContext(self.context, anon_func, cstr!("entry"));
|
||||
LLVMPositionBuilderAtEnd(self.builder, bb);
|
||||
|
||||
// Emit a `ret i64` into the function to return the computed sum.
|
||||
LLVMBuildRet(self.builder, ret.1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for CodeGenerator {
|
||||
fn drop(&mut self) {
|
||||
// Clean up. Values created in the context mostly get cleaned up there.
|
||||
unsafe {
|
||||
LLVMDisposeBuilder(self.builder);
|
||||
LLVMDumpModule(self.module);
|
||||
LLVMDisposeModule(self.module);
|
||||
LLVMContextDispose(self.context);
|
||||
}
|
||||
}
|
||||
}
|
197
src/lexer.rs
197
src/lexer.rs
@ -2,31 +2,123 @@ use std::{fmt::Debug, iter::Peekable, str::Chars};
|
||||
|
||||
static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||
pub enum Token {
|
||||
// Values
|
||||
Identifier(String),
|
||||
/// Number with at most one decimal point
|
||||
DecimalValue(String),
|
||||
|
||||
// Keywords
|
||||
/// `let`
|
||||
LetKeyword,
|
||||
/// `import`
|
||||
ImportKeyword,
|
||||
/// `return`
|
||||
ReturnKeyword,
|
||||
/// `fn`
|
||||
FnKeyword,
|
||||
|
||||
// Symbols
|
||||
/// `;`
|
||||
Semi,
|
||||
/// `=`
|
||||
Equals,
|
||||
/// `:`
|
||||
Colon,
|
||||
/// `+`
|
||||
Plus,
|
||||
/// `*`
|
||||
Times,
|
||||
/// `(`
|
||||
ParenOpen,
|
||||
/// `)`
|
||||
ParenClose,
|
||||
/// `{`
|
||||
BraceOpen,
|
||||
/// `}`
|
||||
BraceClose,
|
||||
/// `,`
|
||||
Comma,
|
||||
|
||||
Eof,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn get_token_prec(&self) -> i8 {
|
||||
match &self {
|
||||
Token::Plus => 10,
|
||||
Token::Times => 20,
|
||||
_ => -1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FullToken {
|
||||
pub token: Token,
|
||||
pub position: Position,
|
||||
}
|
||||
|
||||
impl Debug for FullToken {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_fmt(format_args!(
|
||||
"{:?} (Ln {}, Col {})",
|
||||
self.token, self.position.1, self.position.0
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub type Position = (u32, u32);
|
||||
|
||||
const EOF_CHAR: char = '\0';
|
||||
|
||||
pub struct Cursor<'a> {
|
||||
pub position: Position,
|
||||
char_stream: Chars<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Cursor<'a> {
|
||||
fn next(&mut self) -> Option<char> {
|
||||
let next = self.char_stream.next();
|
||||
self.position.0 += 1;
|
||||
if let Some('\n') = next {
|
||||
self.position.1 += 1;
|
||||
self.position.0 = 0;
|
||||
}
|
||||
next
|
||||
}
|
||||
|
||||
fn first(&mut self) -> Option<char> {
|
||||
// `.next()` optimizes better than `.nth(0)`
|
||||
self.char_stream.clone().next()
|
||||
}
|
||||
|
||||
fn second(&mut self) -> Option<char> {
|
||||
// `.next()` optimizes better than `.nth(1)`
|
||||
let mut stream = self.char_stream.clone();
|
||||
stream.next();
|
||||
stream.next()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, String> {
|
||||
let to_tokenize = to_tokenize.into();
|
||||
let mut position = (0, 1);
|
||||
let mut cursor = Cursor {
|
||||
char_stream: to_tokenize.chars().peekable(),
|
||||
char_stream: to_tokenize.chars(),
|
||||
position,
|
||||
};
|
||||
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
while let Some(character) = &cursor.next() {
|
||||
position.0 += 1;
|
||||
if *character == '\n' {
|
||||
position.1 += 1;
|
||||
position.0 = 0;
|
||||
}
|
||||
|
||||
let peek = cursor.peek();
|
||||
|
||||
let variant = match character {
|
||||
// Whitespace
|
||||
w if w.is_whitespace() => continue,
|
||||
// Comments
|
||||
'/' if peek == Some(&'/') => {
|
||||
while !matches!(&cursor.peek(), Some('\n')) {
|
||||
'/' if cursor.first() == Some('/') => {
|
||||
while !matches!(cursor.first(), Some('\n')) {
|
||||
cursor.next();
|
||||
}
|
||||
continue;
|
||||
@ -34,7 +126,7 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
|
||||
// "words"
|
||||
c if c.is_alphabetic() => {
|
||||
let mut value = character.to_string();
|
||||
while let Some(c) = &cursor.peek() {
|
||||
while let Some(c) = cursor.first() {
|
||||
if !c.is_ascii_alphanumeric() {
|
||||
break;
|
||||
}
|
||||
@ -46,6 +138,8 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
|
||||
let variant = match value.as_str() {
|
||||
"let" => Token::LetKeyword,
|
||||
"import" => Token::ImportKeyword,
|
||||
"return" => Token::ReturnKeyword,
|
||||
"fn" => Token::FnKeyword,
|
||||
_ => Token::Identifier(value),
|
||||
};
|
||||
variant
|
||||
@ -53,8 +147,8 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
|
||||
// Decimals
|
||||
c if DECIMAL_NUMERICS.contains(c) => {
|
||||
let mut value = character.to_string();
|
||||
while let Some(c) = &cursor.peek() {
|
||||
if !DECIMAL_NUMERICS.contains(c) {
|
||||
while let Some(c) = cursor.first() {
|
||||
if !DECIMAL_NUMERICS.contains(&c) {
|
||||
break;
|
||||
}
|
||||
value += &c.to_string();
|
||||
@ -64,12 +158,14 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
|
||||
}
|
||||
// Single character tokens
|
||||
'=' => Token::Equals,
|
||||
';' => Token::Semicolon,
|
||||
';' => Token::Semi,
|
||||
':' => Token::Colon,
|
||||
'+' => Token::Plus,
|
||||
'*' => Token::Times,
|
||||
'(' => Token::ParenOpen,
|
||||
')' => Token::ParenClose,
|
||||
'{' => Token::BraceOpen,
|
||||
'}' => Token::BraceClose,
|
||||
',' => Token::Comma,
|
||||
// Invalid token
|
||||
_ => Err(format!(
|
||||
@ -93,74 +189,3 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Strin
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||
pub enum Token {
|
||||
// Values
|
||||
Identifier(String),
|
||||
/// Number with at most one decimal point
|
||||
DecimalValue(String),
|
||||
|
||||
// Keywords
|
||||
LetKeyword,
|
||||
ImportKeyword,
|
||||
|
||||
// Symbols
|
||||
Semicolon,
|
||||
Equals,
|
||||
Colon,
|
||||
Plus,
|
||||
Times,
|
||||
ParenOpen, // (
|
||||
ParenClose, // )
|
||||
Comma,
|
||||
|
||||
Eof,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn get_token_prec(&self) -> i8 {
|
||||
match &self {
|
||||
Token::Plus => 10,
|
||||
Token::Times => 20,
|
||||
_ => -1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FullToken {
|
||||
pub token: Token,
|
||||
position: Position,
|
||||
}
|
||||
|
||||
impl Debug for FullToken {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_fmt(format_args!(
|
||||
"{:?} (Ln {}, Col {})",
|
||||
self.token, self.position.1, self.position.0
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub type Position = (u32, u32);
|
||||
|
||||
pub struct Cursor<'a> {
|
||||
pub position: Position,
|
||||
char_stream: Peekable<Chars<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Cursor<'a> {
|
||||
fn next(&mut self) -> Option<char> {
|
||||
let next = self.char_stream.next();
|
||||
self.position.0 += 1;
|
||||
if let Some('\n') = next {
|
||||
self.position.1 += 1;
|
||||
self.position.0 = 0;
|
||||
}
|
||||
next
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<&char> {
|
||||
self.char_stream.peek()
|
||||
}
|
||||
}
|
||||
|
24
src/main.rs
24
src/main.rs
@ -1,24 +1,40 @@
|
||||
use crate::{lexer::Token, parser::TopLevelStatement, token_stream::TokenStream};
|
||||
use crate::{
|
||||
codegen::CodeGenerator, lexer::Token, parser::TopLevelStatement, token_stream::TokenStream,
|
||||
};
|
||||
|
||||
pub static EASIEST: &str = include_str!("../reid/easiest.reid");
|
||||
pub static EASY: &str = include_str!("../reid/easy.reid");
|
||||
pub static MEDIUM: &str = include_str!("../reid/medium.reid");
|
||||
pub static HARD: &str = include_str!("../reid/hard.reid");
|
||||
|
||||
mod codegen;
|
||||
mod lexer;
|
||||
mod parser;
|
||||
mod token_stream;
|
||||
|
||||
// TODO:
|
||||
// 1. Make it so that TopLevelStatement can only be import or function def
|
||||
// 2. Make BlockLevelStatement, that has everything TopLevelStatement has now
|
||||
// 3. Make it so all codegen is done with a Block-struct, that represents a
|
||||
// single proper block
|
||||
|
||||
fn main() {
|
||||
let tokens = lexer::tokenize(EASY).unwrap();
|
||||
let tokens = lexer::tokenize(EASIEST).unwrap();
|
||||
|
||||
dbg!(&tokens);
|
||||
|
||||
let mut token_stream = TokenStream::from(&tokens);
|
||||
|
||||
while let Ok(statement) = token_stream.parse::<TopLevelStatement>() {
|
||||
while !matches!(token_stream.peek().unwrap_or(Token::Eof), Token::Eof) {
|
||||
let statement = token_stream.parse::<TopLevelStatement>().unwrap();
|
||||
dbg!(&statement);
|
||||
}
|
||||
|
||||
dbg!(token_stream.expect(Token::Eof).ok());
|
||||
let mut c = CodeGenerator::new();
|
||||
let x = c.get_const(&parser::Literal::I32(3));
|
||||
let y = c.get_const(&parser::Literal::I32(4));
|
||||
let add = c.add(x, y).unwrap();
|
||||
c.create_func(add);
|
||||
|
||||
// dbg!(token_stream.expect(Token::Eof).ok());
|
||||
}
|
||||
|
124
src/parser.rs
124
src/parser.rs
@ -7,12 +7,22 @@ where
|
||||
fn parse(stream: TokenStream) -> Result<Self, ()>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Literal {
|
||||
I32(i32),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum BinaryOperator {
|
||||
Add,
|
||||
Mult,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Expression {
|
||||
VariableName(String),
|
||||
ContantI32(i32),
|
||||
BinopAdd(Box<Expression>, Box<Expression>),
|
||||
BinopMult(Box<Expression>, Box<Expression>),
|
||||
Literal(Literal),
|
||||
Binop(BinaryOperator, Box<Expression>, Box<Expression>),
|
||||
FunctionCall(Box<FunctionCallExpression>),
|
||||
}
|
||||
|
||||
@ -29,7 +39,7 @@ fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, ()>
|
||||
} else if let Some(token) = stream.next() {
|
||||
Ok(match &token {
|
||||
Token::Identifier(v) => Expression::VariableName(v.clone()),
|
||||
Token::DecimalValue(v) => Expression::ContantI32(v.parse().unwrap()),
|
||||
Token::DecimalValue(v) => Expression::Literal(Literal::I32(v.parse().unwrap())),
|
||||
_ => Err(())?, // TODO: Add error raporting!
|
||||
})
|
||||
} else {
|
||||
@ -66,9 +76,11 @@ fn parse_binop_rhs(
|
||||
}
|
||||
}
|
||||
|
||||
use BinaryOperator::*;
|
||||
|
||||
lhs = match &token {
|
||||
Token::Plus => Expression::BinopAdd(Box::new(lhs), Box::new(rhs)),
|
||||
Token::Times => Expression::BinopMult(Box::new(lhs), Box::new(rhs)),
|
||||
Token::Plus => Expression::Binop(Add, Box::new(lhs), Box::new(rhs)),
|
||||
Token::Times => Expression::Binop(Mult, Box::new(lhs), Box::new(rhs)),
|
||||
_ => Err(())?, // TODO: Add error raporting!
|
||||
};
|
||||
}
|
||||
@ -106,24 +118,17 @@ impl Parse for FunctionCallExpression {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TopLevelStatement {
|
||||
Let(LetStatement),
|
||||
Import(ImportStatement),
|
||||
TLExpression(Expression),
|
||||
FunctionDefinition(FunctionDefinition),
|
||||
}
|
||||
|
||||
impl Parse for TopLevelStatement {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
|
||||
use TopLevelStatement as Stmt;
|
||||
Ok(match stream.peek() {
|
||||
Some(Token::LetKeyword) => TopLevelStatement::Let(stream.parse()?),
|
||||
Some(Token::ImportKeyword) => TopLevelStatement::Import(stream.parse()?),
|
||||
_ => {
|
||||
if let Ok(e) = stream.parse() {
|
||||
stream.expect(Token::Semicolon)?;
|
||||
TopLevelStatement::TLExpression(e)
|
||||
} else {
|
||||
Err(())? // TODO: Add error raporting!
|
||||
}
|
||||
}
|
||||
Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
|
||||
Some(Token::FnKeyword) => Stmt::FunctionDefinition(stream.parse()?),
|
||||
_ => Err(())?, // TODO: Add error raporting!
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -139,7 +144,7 @@ impl Parse for LetStatement {
|
||||
stream.expect(Token::Equals)?;
|
||||
|
||||
let expression = stream.parse()?;
|
||||
stream.expect(Token::Semicolon)?;
|
||||
stream.expect(Token::Semi)?;
|
||||
Ok(LetStatement(variable, expression))
|
||||
} else {
|
||||
Err(()) // TODO: Add error raporting!
|
||||
@ -169,8 +174,87 @@ impl Parse for ImportStatement {
|
||||
Err(())? // TODO: Add error raporting!
|
||||
}
|
||||
|
||||
stream.expect(Token::Semicolon)?;
|
||||
stream.expect(Token::Semi)?;
|
||||
|
||||
Ok(ImportStatement(import_list))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FunctionDefinition(FunctionSignature, Block);
|
||||
|
||||
impl Parse for FunctionDefinition {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
|
||||
stream.expect(Token::FnKeyword)?;
|
||||
Ok(FunctionDefinition(stream.parse()?, stream.parse()?))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FunctionSignature {
|
||||
name: String,
|
||||
}
|
||||
|
||||
impl Parse for FunctionSignature {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
|
||||
if let Some(Token::Identifier(name)) = stream.next() {
|
||||
stream.expect(Token::ParenOpen)?;
|
||||
stream.expect(Token::ParenClose)?;
|
||||
Ok(FunctionSignature { name })
|
||||
} else {
|
||||
Err(()) // TODO: Add error raporting!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Block(Vec<BlockLevelStatement>);
|
||||
|
||||
impl Parse for Block {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
|
||||
let mut statements = Vec::new();
|
||||
stream.expect(Token::BraceOpen)?;
|
||||
while !matches!(stream.peek(), Some(Token::BraceClose)) {
|
||||
let statement = stream.parse()?;
|
||||
if let BlockLevelStatement::Return(_) = &statement {
|
||||
statements.push(statement);
|
||||
break; // Return has to be the last statement
|
||||
}
|
||||
statements.push(statement);
|
||||
}
|
||||
stream.expect(Token::BraceClose)?;
|
||||
Ok(Block(statements))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BlockLevelStatement {
|
||||
Let(LetStatement),
|
||||
Import(ImportStatement),
|
||||
Expression(Expression),
|
||||
Return(Expression),
|
||||
}
|
||||
|
||||
impl Parse for BlockLevelStatement {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
|
||||
use BlockLevelStatement as Stmt;
|
||||
Ok(match stream.peek() {
|
||||
Some(Token::LetKeyword) => Stmt::Let(stream.parse()?),
|
||||
Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
|
||||
Some(Token::ReturnKeyword) => {
|
||||
stream.next();
|
||||
let exp = stream.parse()?;
|
||||
stream.expect(Token::Semi)?;
|
||||
Stmt::Return(exp)
|
||||
}
|
||||
_ => {
|
||||
if let Ok(e) = stream.parse() {
|
||||
stream.expect(Token::Semi)?;
|
||||
Stmt::Expression(e)
|
||||
} else {
|
||||
Err(())? // TODO: Add error raporting!
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user