Compare commits
24 Commits
42e74f49f8
...
main
Author | SHA1 | Date | |
---|---|---|---|
47b9d7e044 | |||
5084f21ff9 | |||
4c8417cbee | |||
420fd7b74b | |||
413cd87a02 | |||
f22505be91 | |||
4e89cd7355 | |||
9710406747 | |||
91a6485d5a | |||
9b7a31c988 | |||
c7f11e5091 | |||
e9aab4f43e | |||
2ced45eb40 | |||
08909d76ee | |||
9019e1e5a7 | |||
817b7c2096 | |||
612b4d63a8 | |||
297ff832b5 | |||
a196857dd6 | |||
a9d5302774 | |||
9d7a63f25b | |||
dca604a038 | |||
17ab6a0cea | |||
bda4fa4200 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1,3 @@
|
||||
/target
|
||||
/.vscode
|
||||
.env
|
140
Cargo.lock
generated
140
Cargo.lock
generated
@ -2,6 +2,146 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.147"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
|
||||
[[package]]
|
||||
name = "llvm-sys"
|
||||
version = "160.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0bf51981ac0622b10fe4790763e3de1f3d68a0ee4222e03accaaab6731bd508d"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"regex",
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.66"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
|
||||
|
||||
[[package]]
|
||||
name = "reid"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"llvm-sys",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
|
||||
|
@ -6,3 +6,7 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
## LLVM Bindings
|
||||
llvm-sys = "160"
|
||||
## Make it easier to generate errors
|
||||
thiserror = "1.0.44"
|
61
README.md
Normal file
61
README.md
Normal file
@ -0,0 +1,61 @@
|
||||
# Reid-LLVM
|
||||
Attempt at re-creating Reid, this time using LLVM.
|
||||
|
||||
## Various notes in order to get this working properly
|
||||
This is what worked for me, might not (probably) work for you, depending on
|
||||
various versions of various libraries.
|
||||
|
||||
### Compiling LLVM 16.0.0
|
||||
|
||||
#### Context
|
||||
Context for my computer. I am on ArchLinux, and here are some libraries and
|
||||
their current versions that I have installed as of compiling, I'm not sure what
|
||||
of them are relevant, if any, but saving them here still feels like a good idea
|
||||
for the future:
|
||||
- `cmake 3.27.0-1`
|
||||
- `lib32-llvm-libs 15.0.7-1`
|
||||
- `llvm 15.0.7-3`
|
||||
- `llvm-libs 15.0.7-3`
|
||||
- `gcc 13.1.1-2`
|
||||
- `gcc-libs 13.1.1-2`
|
||||
- `lib32-gcc-libs 13.1.1-2`
|
||||
- `lld 15.0.7-2`
|
||||
- `lldb 15.0.7-3`
|
||||
- `clang 15.0.7-9`
|
||||
- `make 4.4.1-2`
|
||||
- `automake 1.16.5-2`
|
||||
|
||||
#### Commands
|
||||
|
||||
```sh
|
||||
wget https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.0/llvm-16.0.0.src.tar.xz
|
||||
wget https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.0/cmake-16.0.0.src.tar.xz
|
||||
|
||||
tar xvf llvm-16.0.0.src.tar.xz
|
||||
tar xvf cmake-16.0.0.src.tar.xz
|
||||
|
||||
mv cmake-16.0.0.src cmake
|
||||
|
||||
cd llvm-16.0.0.src
|
||||
|
||||
cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/llvm-16 -DCMAKE_BUILD_TYPE=MinSizeRel -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_INCLUDE_TESTS=OFF
|
||||
|
||||
make -j8
|
||||
```
|
||||
|
||||
*Also Note:* Building LLVM with `Ninja` was not successful for me, but this
|
||||
method was. Ninja may be successful with you, to try it, add `-G Ninja` to the
|
||||
`cmake`-command, and instead of `make` run `ninja install`.
|
||||
|
||||
### Building this crate itself
|
||||
|
||||
Assuming `llvm-16.0.0.src` from the previous step was at
|
||||
`/path/llvm-16.0.0.src`, building this crate can be done via the following command:
|
||||
|
||||
```sh
|
||||
LLVM_SYS_160_PREFIX=/path/llvm-16.0.0.src/build cargo build
|
||||
```
|
||||
|
||||
## In conclusion
|
||||
Good luck! It took me a good 10 hours to figure this out for myself, I sure hope
|
||||
these instructions help both myself and someone else in the future!
|
@ -1,4 +0,0 @@
|
||||
// Hello, comment here!
|
||||
let hello = 32;
|
||||
let beep =
|
||||
hello ;
|
12
easy.reid
12
easy.reid
@ -1,12 +0,0 @@
|
||||
// Arithmetic, function calls and imports!
|
||||
|
||||
import std::print;
|
||||
|
||||
let test = 5;
|
||||
let simpleAdd = 2 + 2;
|
||||
let arithmetic = 3 + 2 * 5 + 1 * 2;
|
||||
let multiplier = 5 * 2;
|
||||
|
||||
let result = arithmetic + multiplier * arithmetic;
|
||||
print(result);
|
||||
function(one, two);
|
11
examples/easiest.rs
Normal file
11
examples/easiest.rs
Normal file
@ -0,0 +1,11 @@
|
||||
use reid::compile;
|
||||
|
||||
pub static EASIEST: &str = include_str!("./reid/easiest.reid");
|
||||
|
||||
fn main() {
|
||||
let text = match compile(EASIEST) {
|
||||
Ok(t) => t,
|
||||
Err(e) => panic!("{}", e),
|
||||
};
|
||||
println!("{}", text);
|
||||
}
|
20
examples/reid/easiest.reid
Normal file
20
examples/reid/easiest.reid
Normal file
@ -0,0 +1,20 @@
|
||||
// Hello, comment here!
|
||||
|
||||
import std::print;
|
||||
|
||||
fn main() {
|
||||
let hello = 32 + {
|
||||
2 + 3
|
||||
};
|
||||
let beep = hello + fibonacci();
|
||||
return beep;
|
||||
}
|
||||
|
||||
// Fibonacci
|
||||
|
||||
fn fibonacci(value: i32) -> i32 {
|
||||
if value < 3 {
|
||||
return 1;
|
||||
}
|
||||
return fibonacci(value - 1) + fibonacci(value - 2);
|
||||
}
|
12
examples/reid/easy.reid
Normal file
12
examples/reid/easy.reid
Normal file
@ -0,0 +1,12 @@
|
||||
// Arithmetic, function calls and imports!
|
||||
|
||||
import std::print;
|
||||
|
||||
fn main() {
|
||||
let test = 5;
|
||||
let simpleAdd = 2 + 2;
|
||||
let arithmetic = 3 + 2 * 5 + 1 * 2;
|
||||
let multiplier = 5 * 2;
|
||||
|
||||
return arithmetic + multiplier * arithmetic;
|
||||
}
|
388
src/ast.rs
Normal file
388
src/ast.rs
Normal file
@ -0,0 +1,388 @@
|
||||
use crate::{
|
||||
lexer::Token,
|
||||
token_stream::{Error, TokenStream},
|
||||
};
|
||||
|
||||
pub trait Parse
|
||||
where
|
||||
Self: std::marker::Sized,
|
||||
{
|
||||
fn parse(stream: TokenStream) -> Result<Self, Error>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Type {
|
||||
I32,
|
||||
}
|
||||
|
||||
impl Parse for Type {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
|
||||
if let Some(Token::Identifier(ident)) = stream.next() {
|
||||
Ok(match &*ident {
|
||||
"i32" => Type::I32,
|
||||
_ => panic!("asd"),
|
||||
})
|
||||
} else {
|
||||
Err(stream.expected_err("type identifier")?)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Literal {
|
||||
I32(i32),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Expression {
|
||||
VariableName(String),
|
||||
Literal(Literal),
|
||||
Binop(BinaryOperator, Box<Expression>, Box<Expression>),
|
||||
FunctionCall(Box<FunctionCallExpression>),
|
||||
BlockExpr(Box<Block>),
|
||||
IfExpr(Box<IfExpression>),
|
||||
}
|
||||
|
||||
impl Parse for Expression {
|
||||
fn parse(mut stream: TokenStream) -> Result<Expression, Error> {
|
||||
let lhs = parse_primary_expression(&mut stream)?;
|
||||
parse_binop_rhs(&mut stream, lhs, None)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, Error> {
|
||||
if let Ok(exp) = stream.parse() {
|
||||
Ok(Expression::FunctionCall(Box::new(exp)))
|
||||
} else if let Ok(block) = stream.parse() {
|
||||
Ok(Expression::BlockExpr(Box::new(block)))
|
||||
} else if let Ok(ifexpr) = stream.parse() {
|
||||
Ok(Expression::IfExpr(Box::new(ifexpr)))
|
||||
} else if let Some(token) = stream.next() {
|
||||
Ok(match &token {
|
||||
Token::Identifier(v) => Expression::VariableName(v.clone()),
|
||||
Token::DecimalValue(v) => Expression::Literal(Literal::I32(v.parse().unwrap())),
|
||||
Token::ParenOpen => {
|
||||
let exp = stream.parse()?;
|
||||
stream.expect(Token::ParenClose)?;
|
||||
exp
|
||||
}
|
||||
_ => Err(stream.expected_err("identifier, constant or parentheses")?)?,
|
||||
})
|
||||
} else {
|
||||
Err(stream.expected_err("expression")?)?
|
||||
}
|
||||
}
|
||||
|
||||
/// This algorithm seems somewhat like magic to me. I understand it if I read
|
||||
/// carefully, but it is difficult to read every single time.
|
||||
///
|
||||
/// Reference for how the algorithm is formed:
|
||||
/// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.html#binary-expression-parsing
|
||||
fn parse_binop_rhs(
|
||||
stream: &mut TokenStream,
|
||||
mut lhs: Expression,
|
||||
mut operator: Option<BinaryOperator>,
|
||||
) -> Result<Expression, Error> {
|
||||
let expr_prec = if let Some(op) = operator {
|
||||
op.get_precedence() + 1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
while let Some(op) = operator.take().as_ref().or(stream.parse().as_ref().ok()) {
|
||||
let curr_token_prec = op.get_precedence();
|
||||
|
||||
if curr_token_prec < expr_prec {
|
||||
break; // Just return lhs
|
||||
} else {
|
||||
let mut rhs = parse_primary_expression(stream)?;
|
||||
if let Ok(next_op) = stream.parse::<BinaryOperator>() {
|
||||
let next_prec = next_op.get_precedence();
|
||||
if curr_token_prec < next_prec {
|
||||
// Operator on the right of rhs has more precedence, turn
|
||||
// rhs into lhs for new binop
|
||||
rhs = parse_binop_rhs(stream, rhs, Some(next_op))?;
|
||||
} else {
|
||||
let _ = operator.insert(next_op);
|
||||
}
|
||||
}
|
||||
|
||||
lhs = Expression::Binop(*op, Box::new(lhs), Box::new(rhs));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(lhs)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum BinaryOperator {
|
||||
Add,
|
||||
Minus,
|
||||
Mult,
|
||||
|
||||
And,
|
||||
LessThan,
|
||||
}
|
||||
|
||||
impl Parse for BinaryOperator {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
|
||||
Ok(match (stream.next(), stream.peek()) {
|
||||
(Some(Token::Et), Some(Token::Et)) => {
|
||||
stream.next();
|
||||
BinaryOperator::And
|
||||
}
|
||||
(Some(Token::LessThan), _) => BinaryOperator::LessThan,
|
||||
|
||||
(Some(Token::Plus), _) => BinaryOperator::Add,
|
||||
(Some(Token::Minus), _) => BinaryOperator::Minus,
|
||||
(Some(Token::Times), _) => BinaryOperator::Mult,
|
||||
(_, _) => Err(stream.expected_err("expected operator")?)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl BinaryOperator {
|
||||
pub fn get_precedence(&self) -> i8 {
|
||||
use BinaryOperator::*;
|
||||
match &self {
|
||||
Add => 10,
|
||||
Minus => 10,
|
||||
Mult => 20,
|
||||
And => 100,
|
||||
LessThan => 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FunctionCallExpression(pub String, pub Vec<Expression>);
|
||||
|
||||
impl Parse for FunctionCallExpression {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
|
||||
if let Some(Token::Identifier(name)) = stream.next() {
|
||||
stream.expect(Token::ParenOpen)?;
|
||||
|
||||
let mut args = Vec::new();
|
||||
|
||||
if let Ok(exp) = stream.parse() {
|
||||
args.push(exp);
|
||||
|
||||
while stream.expect(Token::Comma).is_ok() {
|
||||
args.push(stream.parse()?);
|
||||
}
|
||||
}
|
||||
|
||||
stream.expect(Token::ParenClose)?;
|
||||
|
||||
Ok(FunctionCallExpression(name, args))
|
||||
} else {
|
||||
Err(stream.expected_err("identifier")?)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IfExpression(Expression, pub Block);
|
||||
|
||||
impl Parse for IfExpression {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
|
||||
stream.expect(Token::If)?;
|
||||
Ok(IfExpression(stream.parse()?, stream.parse()?))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LetStatement(pub String, pub Expression);
|
||||
|
||||
impl Parse for LetStatement {
|
||||
fn parse(mut stream: TokenStream) -> Result<LetStatement, Error> {
|
||||
stream.expect(Token::LetKeyword)?;
|
||||
|
||||
if let Some(Token::Identifier(variable)) = stream.next() {
|
||||
stream.expect(Token::Equals)?;
|
||||
|
||||
let expression = stream.parse()?;
|
||||
stream.expect(Token::Semi)?;
|
||||
Ok(LetStatement(variable, expression))
|
||||
} else {
|
||||
Err(stream.expected_err("identifier")?)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ImportStatement(Vec<String>);
|
||||
|
||||
impl Parse for ImportStatement {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
|
||||
stream.expect(Token::ImportKeyword)?;
|
||||
|
||||
let mut import_list = Vec::new();
|
||||
|
||||
if let Some(Token::Identifier(name)) = stream.next() {
|
||||
import_list.push(name);
|
||||
while stream.expect(Token::Colon).is_ok() && stream.expect(Token::Colon).is_ok() {
|
||||
if let Some(Token::Identifier(name)) = stream.next() {
|
||||
import_list.push(name);
|
||||
} else {
|
||||
Err(stream.expected_err("identifier")?)?
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Err(stream.expected_err("identifier")?)?
|
||||
}
|
||||
|
||||
stream.expect(Token::Semi)?;
|
||||
|
||||
Ok(ImportStatement(import_list))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FunctionDefinition(pub FunctionSignature, pub Block);
|
||||
|
||||
impl Parse for FunctionDefinition {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
|
||||
stream.expect(Token::FnKeyword)?;
|
||||
Ok(FunctionDefinition(stream.parse()?, stream.parse()?))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FunctionSignature {
|
||||
pub name: String,
|
||||
pub args: Vec<(String, Type)>,
|
||||
pub return_type: Option<Type>,
|
||||
}
|
||||
|
||||
impl Parse for FunctionSignature {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
|
||||
if let Some(Token::Identifier(name)) = stream.next() {
|
||||
stream.expect(Token::ParenOpen)?;
|
||||
let mut args = Vec::new();
|
||||
|
||||
while let Some(Token::Identifier(arg_name)) = stream.peek() {
|
||||
stream.next();
|
||||
stream.expect(Token::Colon)?;
|
||||
args.push((arg_name, stream.parse()?));
|
||||
}
|
||||
|
||||
stream.expect(Token::ParenClose)?;
|
||||
|
||||
let mut return_type = None;
|
||||
if stream.expect(Token::Arrow).is_ok() {
|
||||
return_type = Some(stream.parse()?);
|
||||
}
|
||||
|
||||
Ok(FunctionSignature {
|
||||
name,
|
||||
args,
|
||||
return_type,
|
||||
})
|
||||
} else {
|
||||
Err(stream.expected_err("identifier")?)?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum ReturnType {
|
||||
Soft,
|
||||
Hard,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Block(
|
||||
pub Vec<BlockLevelStatement>,
|
||||
pub Option<(ReturnType, Expression)>,
|
||||
);
|
||||
|
||||
impl Parse for Block {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
|
||||
let mut statements = Vec::new();
|
||||
let mut return_stmt = None;
|
||||
stream.expect(Token::BraceOpen)?;
|
||||
|
||||
while !matches!(stream.peek(), Some(Token::BraceClose)) {
|
||||
if let Some((r_type, e)) = return_stmt.take() {
|
||||
// Special list of expressions that are simply not warned about,
|
||||
// if semicolon is missing.
|
||||
if !matches!(&e, &Expression::IfExpr(_)) {
|
||||
dbg!(r_type, &e);
|
||||
println!("Oh no, does this statement lack ;");
|
||||
}
|
||||
|
||||
statements.push(BlockLevelStatement::Expression(e));
|
||||
}
|
||||
let statement = stream.parse()?;
|
||||
if let BlockLevelStatement::Return((r_type, e)) = &statement {
|
||||
match r_type {
|
||||
ReturnType::Hard => {
|
||||
return_stmt = Some((*r_type, e.clone()));
|
||||
break; // Return has to be the last statement
|
||||
// TODO: Make a mechanism that "can" parse even after this
|
||||
}
|
||||
ReturnType::Soft => {
|
||||
return_stmt = Some((*r_type, e.clone()));
|
||||
continue; // In theory possible to have lines after a soft return
|
||||
}
|
||||
};
|
||||
}
|
||||
statements.push(statement);
|
||||
}
|
||||
stream.expect(Token::BraceClose)?;
|
||||
Ok(Block(statements, return_stmt))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum BlockLevelStatement {
|
||||
Let(LetStatement),
|
||||
Import(ImportStatement),
|
||||
Expression(Expression),
|
||||
Return((ReturnType, Expression)),
|
||||
}
|
||||
|
||||
impl Parse for BlockLevelStatement {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
|
||||
use BlockLevelStatement as Stmt;
|
||||
Ok(match stream.peek() {
|
||||
Some(Token::LetKeyword) => Stmt::Let(stream.parse()?),
|
||||
Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
|
||||
Some(Token::ReturnKeyword) => {
|
||||
stream.next();
|
||||
let exp = stream.parse()?;
|
||||
stream.expect(Token::Semi)?;
|
||||
Stmt::Return((ReturnType::Hard, exp))
|
||||
}
|
||||
_ => {
|
||||
if let Ok(e) = stream.parse() {
|
||||
if stream.expect(Token::Semi).is_ok() {
|
||||
Stmt::Expression(e)
|
||||
} else {
|
||||
Stmt::Return((ReturnType::Soft, e))
|
||||
}
|
||||
} else {
|
||||
Err(stream.expected_err("expression")?)?
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TopLevelStatement {
|
||||
Import(ImportStatement),
|
||||
FunctionDefinition(FunctionDefinition),
|
||||
}
|
||||
|
||||
impl Parse for TopLevelStatement {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
|
||||
use TopLevelStatement as Stmt;
|
||||
Ok(match stream.peek() {
|
||||
Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
|
||||
Some(Token::FnKeyword) => Stmt::FunctionDefinition(stream.parse()?),
|
||||
_ => Err(stream.expected_err("import or fn")?)?,
|
||||
})
|
||||
}
|
||||
}
|
222
src/codegen.rs
Normal file
222
src/codegen.rs
Normal file
@ -0,0 +1,222 @@
|
||||
use std::collections::{hash_map, HashMap};
|
||||
|
||||
use crate::{
|
||||
ast::{
|
||||
BinaryOperator, Block, BlockLevelStatement, Expression, FunctionCallExpression,
|
||||
FunctionDefinition, FunctionSignature, ReturnType, TopLevelStatement,
|
||||
},
|
||||
llvm_ir::{self, IRBlock, IRFunction, IRModule, IRValue, IRValueType},
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ScopeData {
|
||||
named_vars: HashMap<String, IRValue>,
|
||||
defined_functions: HashMap<String, (FunctionSignature, Option<IRFunction>)>,
|
||||
}
|
||||
|
||||
impl ScopeData {
|
||||
pub fn inner<'a, 'b>(&self, block: &'b mut IRBlock<'a>) -> Scope<'a, 'b> {
|
||||
Scope {
|
||||
block,
|
||||
data: self.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn var(&self, name: &String) -> Option<&IRValue> {
|
||||
self.named_vars.get(name)
|
||||
}
|
||||
|
||||
pub fn set_var(&mut self, name: &str, val: IRValue) -> Result<(), Error> {
|
||||
if let hash_map::Entry::Vacant(e) = self.named_vars.entry(name.to_owned()) {
|
||||
e.insert(val);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::VariableAlreadyDefined(name.to_owned()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn function(
|
||||
&mut self,
|
||||
name: &String,
|
||||
) -> Option<&mut (FunctionSignature, Option<IRFunction>)> {
|
||||
self.defined_functions.get_mut(name)
|
||||
}
|
||||
|
||||
pub fn set_function_signature(
|
||||
&mut self,
|
||||
name: &str,
|
||||
sig: FunctionSignature,
|
||||
ir: IRFunction,
|
||||
) -> Result<(), Error> {
|
||||
if let hash_map::Entry::Vacant(e) = self.defined_functions.entry(name.to_owned()) {
|
||||
e.insert((sig, Some(ir)));
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::VariableAlreadyDefined(name.to_owned()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Scope<'a, 'b> {
|
||||
pub block: &'b mut IRBlock<'a>,
|
||||
pub data: ScopeData,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Scope<'a, 'b> {
|
||||
pub fn inner<'c>(&'c mut self) -> Scope<'a, 'c> {
|
||||
Scope {
|
||||
block: self.block,
|
||||
data: self.data.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn codegen_from_statements(statements: Vec<TopLevelStatement>) -> Result<IRModule, Error> {
|
||||
let mut module = IRModule::new("testmod");
|
||||
|
||||
let mut scope = ScopeData {
|
||||
defined_functions: HashMap::new(),
|
||||
named_vars: HashMap::new(),
|
||||
};
|
||||
|
||||
for statement in &statements {
|
||||
match statement {
|
||||
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, _)) => {
|
||||
let function = module.create_func(&sig.name, IRValueType::I32);
|
||||
scope.set_function_signature(&sig.name.clone(), sig.clone(), function)?;
|
||||
}
|
||||
TopLevelStatement::Import(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
for statement in &statements {
|
||||
statement.codegen(&mut module, &mut scope)?;
|
||||
}
|
||||
|
||||
Ok(module)
|
||||
}
|
||||
|
||||
impl TopLevelStatement {
|
||||
pub fn codegen(&self, module: &mut IRModule, root_data: &mut ScopeData) -> Result<(), Error> {
|
||||
match self {
|
||||
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, block)) => {
|
||||
if let Some((_, ir)) = root_data.function(&sig.name) {
|
||||
if let Some(ir_function) = ir.take() {
|
||||
let mut ir_block = module.create_block();
|
||||
let mut scope = root_data.inner(&mut ir_block);
|
||||
|
||||
let (_, value) = match block.codegen(&mut scope)? {
|
||||
Some(v) => v,
|
||||
None => panic!("Void-return type function not yet implemented!"),
|
||||
};
|
||||
|
||||
ir_function.add_definition(value, ir_block);
|
||||
} else {
|
||||
Err(Error::FunctionAlreadyDefined(sig.name.clone()))?
|
||||
}
|
||||
} else {
|
||||
panic!("Function was not declared before it's definition")
|
||||
}
|
||||
}
|
||||
TopLevelStatement::Import(_) => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Block {
|
||||
pub fn codegen(&self, scope: &mut Scope) -> Result<Option<(ReturnType, IRValue)>, Error> {
|
||||
for statement in &self.0 {
|
||||
statement.codegen(scope)?;
|
||||
}
|
||||
|
||||
let value = if let Some((rt, exp)) = &self.1 {
|
||||
Some((*rt, exp.codegen(scope)?))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl BlockLevelStatement {
|
||||
pub fn codegen(&self, scope: &mut Scope) -> Result<(), Error> {
|
||||
match self {
|
||||
BlockLevelStatement::Let(let_statement) => {
|
||||
let val = let_statement.1.codegen(scope)?;
|
||||
scope.data.set_var(&let_statement.0, val)?;
|
||||
Ok(())
|
||||
}
|
||||
BlockLevelStatement::Return(_) => panic!("Should never happen"),
|
||||
BlockLevelStatement::Import(_) => Ok(()), // TODO: To implement
|
||||
BlockLevelStatement::Expression(e) => {
|
||||
let _value = e.codegen(scope)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
pub fn codegen(&self, scope: &mut Scope) -> Result<IRValue, Error> {
|
||||
use Expression::*;
|
||||
match self {
|
||||
Binop(op, lhs, rhs) => match op {
|
||||
BinaryOperator::Add => {
|
||||
let lhs = lhs.codegen(scope)?;
|
||||
let rhs = rhs.codegen(scope)?;
|
||||
Ok(scope.block.add(lhs, rhs)?)
|
||||
}
|
||||
BinaryOperator::Mult => {
|
||||
let lhs = lhs.codegen(scope)?;
|
||||
let rhs = rhs.codegen(scope)?;
|
||||
Ok(scope.block.mul(lhs, rhs)?)
|
||||
}
|
||||
_ => panic!("Other binary operators not supported yet!"),
|
||||
},
|
||||
BlockExpr(block) => {
|
||||
let mut inner = scope.inner();
|
||||
|
||||
Ok(match block.codegen(&mut inner)? {
|
||||
Some((r_type, value)) => match r_type {
|
||||
ReturnType::Soft => value,
|
||||
ReturnType::Hard => {
|
||||
panic!("Hard returns in inner blocks not supported yet")
|
||||
}
|
||||
},
|
||||
None => panic!("Void-return type block not yet implemented!"),
|
||||
})
|
||||
}
|
||||
FunctionCall(fc) => {
|
||||
let FunctionCallExpression(name, _) = &**fc;
|
||||
if let Some((sig, _)) = scope.data.function(name) {
|
||||
Ok(scope.block.function_call(sig)?)
|
||||
} else {
|
||||
Err(Error::UndefinedFunction(name.clone()))?
|
||||
}
|
||||
}
|
||||
VariableName(name) => scope
|
||||
.data
|
||||
.var(name)
|
||||
.cloned()
|
||||
.ok_or(Error::UndefinedVariable(name.clone())),
|
||||
Literal(lit) => Ok(scope.block.get_const(lit)),
|
||||
IfExpr(_) => panic!("if expressions not yet supported"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("Variable '{0}' already defined")]
|
||||
VariableAlreadyDefined(String),
|
||||
#[error("Variable '{0}' not yet defined")]
|
||||
UndefinedVariable(String),
|
||||
#[error("Function '{0}' not defined")]
|
||||
UndefinedFunction(String),
|
||||
#[error("Function '{0}' already defined")]
|
||||
FunctionAlreadyDefined(String),
|
||||
#[error(transparent)]
|
||||
Deeper(#[from] llvm_ir::Error),
|
||||
}
|
256
src/lexer.rs
256
src/lexer.rs
@ -1,99 +1,7 @@
|
||||
use std::{fmt::Debug, iter::Peekable, str::Chars};
|
||||
use std::{fmt::Debug, str::Chars};
|
||||
|
||||
static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
|
||||
|
||||
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, String> {
|
||||
let to_tokenize = to_tokenize.into();
|
||||
let mut position = (0, 1);
|
||||
let mut cursor = Cursor {
|
||||
char_stream: to_tokenize.chars().peekable(),
|
||||
position,
|
||||
};
|
||||
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
while let Some(character) = &cursor.next() {
|
||||
position.0 += 1;
|
||||
if *character == '\n' {
|
||||
position.1 += 1;
|
||||
position.0 = 0;
|
||||
}
|
||||
|
||||
let peek = cursor.peek();
|
||||
|
||||
let variant = match character {
|
||||
// Whitespace
|
||||
w if w.is_whitespace() => continue,
|
||||
// Comments
|
||||
'/' if peek == Some(&'/') => {
|
||||
while !matches!(&cursor.peek(), Some('\n')) {
|
||||
cursor.next();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// "words"
|
||||
c if c.is_alphabetic() => {
|
||||
let mut value = character.to_string();
|
||||
while let Some(c) = &cursor.peek() {
|
||||
if !c.is_ascii_alphanumeric() {
|
||||
break;
|
||||
}
|
||||
value += &c.to_string();
|
||||
cursor.next();
|
||||
}
|
||||
|
||||
// Check for keywords
|
||||
let variant = match value.as_str() {
|
||||
"let" => Token::LetKeyword,
|
||||
"import" => Token::ImportKeyword,
|
||||
_ => Token::Identifier(value),
|
||||
};
|
||||
variant
|
||||
}
|
||||
// Decimals
|
||||
c if DECIMAL_NUMERICS.contains(c) => {
|
||||
let mut value = character.to_string();
|
||||
while let Some(c) = &cursor.peek() {
|
||||
if !DECIMAL_NUMERICS.contains(c) {
|
||||
break;
|
||||
}
|
||||
value += &c.to_string();
|
||||
cursor.next();
|
||||
}
|
||||
Token::DecimalValue(value)
|
||||
}
|
||||
// Single character tokens
|
||||
'=' => Token::Equals,
|
||||
';' => Token::Semicolon,
|
||||
':' => Token::Colon,
|
||||
'+' => Token::Plus,
|
||||
'*' => Token::Times,
|
||||
'(' => Token::ParenOpen,
|
||||
')' => Token::ParenClose,
|
||||
',' => Token::Comma,
|
||||
// Invalid token
|
||||
_ => Err(format!(
|
||||
"Unknown token '{}' at {}, {}",
|
||||
character, position.0, position.1
|
||||
))?,
|
||||
};
|
||||
|
||||
tokens.push(FullToken {
|
||||
token: variant,
|
||||
position,
|
||||
});
|
||||
}
|
||||
|
||||
position.0 += 1;
|
||||
|
||||
tokens.push(FullToken {
|
||||
token: Token::Eof,
|
||||
position,
|
||||
});
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||
pub enum Token {
|
||||
// Values
|
||||
@ -102,17 +10,49 @@ pub enum Token {
|
||||
DecimalValue(String),
|
||||
|
||||
// Keywords
|
||||
/// `let`
|
||||
LetKeyword,
|
||||
/// `import`
|
||||
ImportKeyword,
|
||||
/// `return`
|
||||
ReturnKeyword,
|
||||
/// `fn`
|
||||
FnKeyword,
|
||||
/// `->`
|
||||
Arrow,
|
||||
/// `if`
|
||||
If,
|
||||
|
||||
// Symbols
|
||||
Semicolon,
|
||||
/// `;`
|
||||
Semi,
|
||||
/// `=`
|
||||
Equals,
|
||||
/// `:`
|
||||
Colon,
|
||||
/// `+`
|
||||
Plus,
|
||||
/// `*`
|
||||
Times,
|
||||
ParenOpen, // (
|
||||
ParenClose, // )
|
||||
/// `-`
|
||||
Minus,
|
||||
|
||||
/// `>`
|
||||
GreaterThan,
|
||||
/// `<`
|
||||
LessThan,
|
||||
/// `&`
|
||||
Et,
|
||||
|
||||
/// `(`
|
||||
ParenOpen,
|
||||
/// `)`
|
||||
ParenClose,
|
||||
/// `{`
|
||||
BraceOpen,
|
||||
/// `}`
|
||||
BraceClose,
|
||||
/// `,`
|
||||
Comma,
|
||||
|
||||
Eof,
|
||||
@ -122,15 +62,23 @@ impl Token {
|
||||
pub fn get_token_prec(&self) -> i8 {
|
||||
match &self {
|
||||
Token::Plus => 10,
|
||||
Token::Minus => 10,
|
||||
Token::Times => 20,
|
||||
_ => -1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Token> for String {
|
||||
fn from(value: Token) -> Self {
|
||||
format!("{:?}", value)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FullToken {
|
||||
pub token: Token,
|
||||
position: Position,
|
||||
pub position: Position,
|
||||
}
|
||||
|
||||
impl Debug for FullToken {
|
||||
@ -146,21 +94,127 @@ pub type Position = (u32, u32);
|
||||
|
||||
pub struct Cursor<'a> {
|
||||
pub position: Position,
|
||||
char_stream: Peekable<Chars<'a>>,
|
||||
char_stream: Chars<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Cursor<'a> {
|
||||
fn next(&mut self) -> Option<char> {
|
||||
let next = self.char_stream.next();
|
||||
self.position.0 += 1;
|
||||
if let Some('\n') = next {
|
||||
self.position.1 += 1;
|
||||
self.position.0 = 0;
|
||||
}
|
||||
self.position.0 += 1;
|
||||
next
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<&char> {
|
||||
self.char_stream.peek()
|
||||
fn first(&mut self) -> Option<char> {
|
||||
// `.next()` optimizes better than `.nth(0)`
|
||||
self.char_stream.clone().next()
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // Is this actually needed?
|
||||
fn second(&mut self) -> Option<char> {
|
||||
// `.next()` optimizes better than `.nth(1)`
|
||||
let mut stream = self.char_stream.clone();
|
||||
stream.next();
|
||||
stream.next()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Error> {
|
||||
let to_tokenize = to_tokenize.into();
|
||||
let mut cursor = Cursor {
|
||||
char_stream: to_tokenize.chars(),
|
||||
position: (0, 1),
|
||||
};
|
||||
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
while let Some(character) = &cursor.next() {
|
||||
let variant = match character {
|
||||
// Whitespace
|
||||
w if w.is_whitespace() => continue,
|
||||
// Comments
|
||||
'/' if cursor.first() == Some('/') => {
|
||||
while !matches!(cursor.first(), Some('\n') | None) {
|
||||
cursor.next();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// "words"
|
||||
c if c.is_alphabetic() => {
|
||||
let mut value = character.to_string();
|
||||
while let Some(c) = cursor.first() {
|
||||
if !c.is_ascii_alphanumeric() {
|
||||
break;
|
||||
}
|
||||
value += &c.to_string();
|
||||
cursor.next();
|
||||
}
|
||||
|
||||
// Check for keywords
|
||||
let variant = match value.as_str() {
|
||||
"let" => Token::LetKeyword,
|
||||
"import" => Token::ImportKeyword,
|
||||
"return" => Token::ReturnKeyword,
|
||||
"fn" => Token::FnKeyword,
|
||||
"if" => Token::If,
|
||||
_ => Token::Identifier(value),
|
||||
};
|
||||
variant
|
||||
}
|
||||
// Decimals
|
||||
c if DECIMAL_NUMERICS.contains(c) => {
|
||||
let mut value = character.to_string();
|
||||
while let Some(c) = cursor.first() {
|
||||
if !DECIMAL_NUMERICS.contains(&c) {
|
||||
break;
|
||||
}
|
||||
value += &c.to_string();
|
||||
cursor.next();
|
||||
}
|
||||
Token::DecimalValue(value)
|
||||
}
|
||||
'-' if cursor.first() == Some('>') => {
|
||||
cursor.next(); // Eat `>`
|
||||
Token::Arrow
|
||||
}
|
||||
// Single character tokens
|
||||
'=' => Token::Equals,
|
||||
';' => Token::Semi,
|
||||
':' => Token::Colon,
|
||||
'+' => Token::Plus,
|
||||
'*' => Token::Times,
|
||||
'-' => Token::Minus,
|
||||
'>' => Token::GreaterThan,
|
||||
'<' => Token::LessThan,
|
||||
'&' => Token::Et,
|
||||
'(' => Token::ParenOpen,
|
||||
')' => Token::ParenClose,
|
||||
'{' => Token::BraceOpen,
|
||||
'}' => Token::BraceClose,
|
||||
',' => Token::Comma,
|
||||
// Invalid token
|
||||
_ => Err(Error::InvalidToken(*character, cursor.position))?,
|
||||
};
|
||||
|
||||
tokens.push(FullToken {
|
||||
token: variant,
|
||||
position: cursor.position,
|
||||
});
|
||||
}
|
||||
|
||||
tokens.push(FullToken {
|
||||
token: Token::Eof,
|
||||
position: cursor.position,
|
||||
});
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("Invalid token '{}' at Ln {}, Col {}", .0, (.1).1, (.1).0)]
|
||||
InvalidToken(char, Position),
|
||||
}
|
||||
|
46
src/lib.rs
Normal file
46
src/lib.rs
Normal file
@ -0,0 +1,46 @@
|
||||
use crate::{
|
||||
ast::TopLevelStatement, codegen::codegen_from_statements, lexer::Token,
|
||||
token_stream::TokenStream,
|
||||
};
|
||||
|
||||
mod ast;
|
||||
mod codegen;
|
||||
mod lexer;
|
||||
mod llvm_ir;
|
||||
mod token_stream;
|
||||
|
||||
// TODO:
|
||||
// 1. Make it so that TopLevelStatement can only be import or function def
|
||||
// 2. Make BlockLevelStatement, that has everything TopLevelStatement has now
|
||||
// 3. Make it so all codegen is done with a Block-struct, that represents a
|
||||
// single proper block
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum ReidError {
|
||||
#[error(transparent)]
|
||||
LexerError(#[from] lexer::Error),
|
||||
#[error(transparent)]
|
||||
ParserError(#[from] token_stream::Error),
|
||||
#[error(transparent)]
|
||||
CodegenError(#[from] codegen::Error),
|
||||
}
|
||||
|
||||
pub fn compile(source: &str) -> Result<String, ReidError> {
|
||||
let tokens = lexer::tokenize(source)?;
|
||||
|
||||
dbg!(&tokens);
|
||||
|
||||
let mut token_stream = TokenStream::from(&tokens);
|
||||
|
||||
let mut statements = Vec::new();
|
||||
|
||||
while !matches!(token_stream.peek().unwrap_or(Token::Eof), Token::Eof) {
|
||||
let statement = token_stream.parse::<TopLevelStatement>()?;
|
||||
dbg!(&statement);
|
||||
statements.push(statement);
|
||||
}
|
||||
|
||||
let mut module = codegen_from_statements(statements)?;
|
||||
let text = module.print_to_string().unwrap();
|
||||
Ok(text.to_owned())
|
||||
}
|
203
src/llvm_ir.rs
Normal file
203
src/llvm_ir.rs
Normal file
@ -0,0 +1,203 @@
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::mem;
|
||||
|
||||
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
|
||||
|
||||
use crate::ast::{FunctionSignature, Literal};
|
||||
|
||||
macro_rules! cstr {
|
||||
($string:expr) => {
|
||||
core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr()
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[must_use = "value contains raw pointer and must be inserted somewhere"]
|
||||
pub struct IRValue(IRValueType, LLVMValueRef);
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum IRValueType {
|
||||
I32,
|
||||
}
|
||||
|
||||
impl IRValueType {
|
||||
unsafe fn get_llvm_type(&self, module: &mut IRModule) -> LLVMTypeRef {
|
||||
match *self {
|
||||
Self::I32 => LLVMInt32TypeInContext(module.context),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn into_cstring<T: Into<String>>(value: T) -> CString {
|
||||
let string = value.into();
|
||||
unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) }
|
||||
}
|
||||
|
||||
pub struct IRModule {
|
||||
context: *mut LLVMContext,
|
||||
module: *mut LLVMModule,
|
||||
builder: *mut LLVMBuilder,
|
||||
}
|
||||
|
||||
impl IRModule {
|
||||
pub fn new<T: Into<String>>(name: T) -> IRModule {
|
||||
unsafe {
|
||||
// Set up a context, module and builder in that context.
|
||||
let context = LLVMContextCreate();
|
||||
let module = LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), context);
|
||||
let builder = LLVMCreateBuilderInContext(context);
|
||||
|
||||
IRModule {
|
||||
context,
|
||||
module,
|
||||
builder,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_block(&mut self) -> IRBlock {
|
||||
IRBlock::create("entry", self)
|
||||
}
|
||||
|
||||
pub fn create_func<T: Into<String>>(
|
||||
&mut self,
|
||||
name: T,
|
||||
return_type: IRValueType,
|
||||
) -> IRFunction {
|
||||
unsafe {
|
||||
let mut argts = [];
|
||||
let func_type = LLVMFunctionType(
|
||||
return_type.get_llvm_type(self),
|
||||
argts.as_mut_ptr(),
|
||||
argts.len() as u32,
|
||||
0,
|
||||
);
|
||||
|
||||
let anon_func = LLVMAddFunction(self.module, into_cstring(name).as_ptr(), func_type);
|
||||
IRFunction {
|
||||
value: IRValue(return_type, anon_func),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> {
|
||||
unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for IRModule {
|
||||
fn drop(&mut self) {
|
||||
// Clean up. Values created in the context mostly get cleaned up there.
|
||||
unsafe {
|
||||
LLVMDisposeBuilder(self.builder);
|
||||
LLVMDisposeModule(self.module);
|
||||
LLVMContextDispose(self.context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IRFunction {
|
||||
value: IRValue,
|
||||
}
|
||||
|
||||
impl IRFunction {
|
||||
pub fn add_definition(self, ret: IRValue, block: IRBlock) {
|
||||
unsafe {
|
||||
LLVMAppendExistingBasicBlock(self.value.1, block.blockref);
|
||||
LLVMBuildRet(block.module.builder, ret.1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IRBlock<'a> {
|
||||
module: &'a mut IRModule,
|
||||
blockref: LLVMBasicBlockRef,
|
||||
}
|
||||
|
||||
impl<'a> IRBlock<'a> {
|
||||
fn create<T: Into<String>>(name: T, codegen: &'a mut IRModule) -> IRBlock<'a> {
|
||||
unsafe {
|
||||
let blockref =
|
||||
LLVMCreateBasicBlockInContext(codegen.context, into_cstring(name).as_ptr());
|
||||
LLVMPositionBuilderAtEnd(codegen.builder, blockref);
|
||||
IRBlock {
|
||||
module: codegen,
|
||||
blockref,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_const(&mut self, literal_type: &Literal) -> IRValue {
|
||||
unsafe {
|
||||
match *literal_type {
|
||||
Literal::I32(v) => IRValue(
|
||||
IRValueType::I32,
|
||||
LLVMConstInt(
|
||||
LLVMInt32TypeInContext(self.module.context),
|
||||
mem::transmute(v as i64),
|
||||
1,
|
||||
),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
|
||||
unsafe {
|
||||
if lhs.0 == rhs.0 {
|
||||
Ok(IRValue(
|
||||
lhs.0,
|
||||
LLVMBuildAdd(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
|
||||
))
|
||||
} else {
|
||||
Err(Error::TypeMismatch(lhs.0, rhs.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mul(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
|
||||
unsafe {
|
||||
if lhs.0 == rhs.0 {
|
||||
Ok(IRValue(
|
||||
lhs.0,
|
||||
LLVMBuildMul(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
|
||||
))
|
||||
} else {
|
||||
Err(Error::TypeMismatch(lhs.0, rhs.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn function_call(&mut self, callee: &FunctionSignature) -> Result<IRValue, Error> {
|
||||
unsafe {
|
||||
let function = LLVMGetNamedFunction(
|
||||
self.module.module,
|
||||
into_cstring(callee.name.clone()).as_ptr(),
|
||||
);
|
||||
|
||||
let ret_t = LLVMInt32TypeInContext(self.module.context);
|
||||
let mut argts = [];
|
||||
let mut args = [];
|
||||
|
||||
let fun_t = LLVMFunctionType(ret_t, argts.as_mut_ptr(), argts.len() as u32, 0);
|
||||
|
||||
let call = LLVMBuildCall2(
|
||||
self.module.builder,
|
||||
fun_t,
|
||||
function,
|
||||
args.as_mut_ptr(),
|
||||
args.len() as u32,
|
||||
into_cstring(&callee.name).as_ptr(),
|
||||
);
|
||||
|
||||
Ok(IRValue(IRValueType::I32, call))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error("Type Mismatch: {0:?} {1:?}")]
|
||||
TypeMismatch(IRValueType, IRValueType),
|
||||
}
|
24
src/main.rs
24
src/main.rs
@ -1,24 +0,0 @@
|
||||
use crate::{lexer::Token, parser::TopLevelStatement, token_stream::TokenStream};
|
||||
|
||||
pub static EASIEST: &str = include_str!("../easiest.reid");
|
||||
pub static EASY: &str = include_str!("../easy.reid");
|
||||
pub static MEDIUM: &str = include_str!("../medium.reid");
|
||||
pub static HARD: &str = include_str!("../hard.reid");
|
||||
|
||||
mod lexer;
|
||||
mod parser;
|
||||
mod token_stream;
|
||||
|
||||
fn main() {
|
||||
let tokens = lexer::tokenize(EASY).unwrap();
|
||||
|
||||
dbg!(&tokens);
|
||||
|
||||
let mut token_stream = TokenStream::from(&tokens);
|
||||
|
||||
while let Ok(statement) = token_stream.parse::<TopLevelStatement>() {
|
||||
dbg!(&statement);
|
||||
}
|
||||
|
||||
dbg!(token_stream.expect(Token::Eof).ok());
|
||||
}
|
176
src/parser.rs
176
src/parser.rs
@ -1,176 +0,0 @@
|
||||
use crate::{lexer::Token, token_stream::TokenStream};
|
||||
|
||||
pub trait Parse
|
||||
where
|
||||
Self: std::marker::Sized,
|
||||
{
|
||||
fn parse(stream: TokenStream) -> Result<Self, ()>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Expression {
|
||||
VariableName(String),
|
||||
ContantI32(i32),
|
||||
BinopAdd(Box<Expression>, Box<Expression>),
|
||||
BinopMult(Box<Expression>, Box<Expression>),
|
||||
FunctionCall(Box<FunctionCallExpression>),
|
||||
}
|
||||
|
||||
impl Parse for Expression {
|
||||
fn parse(mut stream: TokenStream) -> Result<Expression, ()> {
|
||||
let lhs = parse_primary_expression(&mut stream)?;
|
||||
parse_binop_rhs(&mut stream, lhs, 0)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, ()> {
|
||||
if let Ok(exp) = stream.parse() {
|
||||
Ok(Expression::FunctionCall(Box::new(exp)))
|
||||
} else if let Some(token) = stream.next() {
|
||||
Ok(match &token {
|
||||
Token::Identifier(v) => Expression::VariableName(v.clone()),
|
||||
Token::DecimalValue(v) => Expression::ContantI32(v.parse().unwrap()),
|
||||
_ => Err(())?, // TODO: Add error raporting!
|
||||
})
|
||||
} else {
|
||||
Err(()) // TODO: Add error raporting!
|
||||
}
|
||||
}
|
||||
|
||||
/// This algorithm seems somewhat like magic to me. I understand it if I read
|
||||
/// carefully, but it is difficult to read every single time.
|
||||
///
|
||||
/// Reference for how the algorithm is formed:
|
||||
/// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.html#binary-expression-parsing
|
||||
fn parse_binop_rhs(
|
||||
stream: &mut TokenStream,
|
||||
mut lhs: Expression,
|
||||
expr_prec: i8,
|
||||
) -> Result<Expression, ()> {
|
||||
while let Some(token) = stream.peek() {
|
||||
let curr_token_prec = token.get_token_prec();
|
||||
|
||||
if curr_token_prec < expr_prec {
|
||||
break; // Just return lhs
|
||||
} else {
|
||||
// token has to be an operator
|
||||
stream.next(); // Eat token
|
||||
|
||||
let mut rhs = parse_primary_expression(stream)?;
|
||||
if let Some(next_op) = stream.peek() {
|
||||
let next_prec = next_op.get_token_prec();
|
||||
if curr_token_prec < next_prec {
|
||||
// Operator on the right of rhs has more precedence, turn
|
||||
// rhs into lhs for new binop
|
||||
rhs = parse_binop_rhs(stream, rhs, curr_token_prec + 1)?;
|
||||
}
|
||||
}
|
||||
|
||||
lhs = match &token {
|
||||
Token::Plus => Expression::BinopAdd(Box::new(lhs), Box::new(rhs)),
|
||||
Token::Times => Expression::BinopMult(Box::new(lhs), Box::new(rhs)),
|
||||
_ => Err(())?, // TODO: Add error raporting!
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Ok(lhs)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FunctionCallExpression(String, Vec<Expression>);
|
||||
|
||||
impl Parse for FunctionCallExpression {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
|
||||
if let Some(Token::Identifier(name)) = stream.next() {
|
||||
stream.expect(Token::ParenOpen)?;
|
||||
|
||||
let mut args = Vec::new();
|
||||
|
||||
if let Ok(exp) = stream.parse() {
|
||||
args.push(exp);
|
||||
|
||||
while stream.expect(Token::Comma).is_ok() {
|
||||
args.push(stream.parse()?);
|
||||
}
|
||||
}
|
||||
|
||||
stream.expect(Token::ParenClose)?;
|
||||
|
||||
Ok(FunctionCallExpression(name, args))
|
||||
} else {
|
||||
Err(())? // TODO: Add error raporting!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TopLevelStatement {
|
||||
Let(LetStatement),
|
||||
Import(ImportStatement),
|
||||
TLExpression(Expression),
|
||||
}
|
||||
|
||||
impl Parse for TopLevelStatement {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
|
||||
Ok(match stream.peek() {
|
||||
Some(Token::LetKeyword) => TopLevelStatement::Let(stream.parse()?),
|
||||
Some(Token::ImportKeyword) => TopLevelStatement::Import(stream.parse()?),
|
||||
_ => {
|
||||
if let Ok(e) = stream.parse() {
|
||||
stream.expect(Token::Semicolon)?;
|
||||
TopLevelStatement::TLExpression(e)
|
||||
} else {
|
||||
Err(())? // TODO: Add error raporting!
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LetStatement(String, Expression);
|
||||
|
||||
impl Parse for LetStatement {
|
||||
fn parse(mut stream: TokenStream) -> Result<LetStatement, ()> {
|
||||
stream.expect(Token::LetKeyword)?;
|
||||
|
||||
if let Some(Token::Identifier(variable)) = stream.next() {
|
||||
stream.expect(Token::Equals)?;
|
||||
|
||||
let expression = stream.parse()?;
|
||||
stream.expect(Token::Semicolon)?;
|
||||
Ok(LetStatement(variable, expression))
|
||||
} else {
|
||||
Err(()) // TODO: Add error raporting!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ImportStatement(Vec<String>);
|
||||
|
||||
impl Parse for ImportStatement {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
|
||||
stream.expect(Token::ImportKeyword)?;
|
||||
|
||||
let mut import_list = Vec::new();
|
||||
|
||||
if let Some(Token::Identifier(name)) = stream.next() {
|
||||
import_list.push(name);
|
||||
while stream.expect(Token::Colon).is_ok() && stream.expect(Token::Colon).is_ok() {
|
||||
if let Some(Token::Identifier(name)) = stream.next() {
|
||||
import_list.push(name);
|
||||
} else {
|
||||
Err(())? // TODO: Add error raporting!
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Err(())? // TODO: Add error raporting!
|
||||
}
|
||||
|
||||
stream.expect(Token::Semicolon)?;
|
||||
|
||||
Ok(ImportStatement(import_list))
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
use crate::{
|
||||
lexer::{FullToken, Token},
|
||||
parser::Parse,
|
||||
ast::Parse,
|
||||
lexer::{FullToken, Position, Token},
|
||||
};
|
||||
|
||||
pub struct TokenStream<'a, 'b> {
|
||||
@ -18,16 +18,24 @@ impl<'a, 'b> TokenStream<'a, 'b> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expect(&mut self, token: Token) -> Result<(), ()> {
|
||||
pub fn expected_err<T: Into<String>>(&mut self, expected: T) -> Result<Error, Error> {
|
||||
Ok(Error::Expected(
|
||||
expected.into(),
|
||||
self.peek().unwrap_or(Token::Eof),
|
||||
self.get_next_position()?,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn expect(&mut self, token: Token) -> Result<(), Error> {
|
||||
if let Some(peeked) = self.peek() {
|
||||
if token == peeked {
|
||||
self.position += 1;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(())
|
||||
Err(self.expected_err(token)?)
|
||||
}
|
||||
} else {
|
||||
Err(())
|
||||
Err(self.expected_err(token)?)
|
||||
}
|
||||
}
|
||||
|
||||
@ -49,7 +57,7 @@ impl<'a, 'b> TokenStream<'a, 'b> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse<T: Parse>(&mut self) -> Result<T, ()> {
|
||||
pub fn parse<T: Parse>(&mut self) -> Result<T, Error> {
|
||||
let mut ref_pos = self.position;
|
||||
|
||||
let position = self.position;
|
||||
@ -67,6 +75,15 @@ impl<'a, 'b> TokenStream<'a, 'b> {
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_next_position(&self) -> Result<Position, Error> {
|
||||
if self.tokens.is_empty() {
|
||||
Err(Error::FileEmpty)
|
||||
} else {
|
||||
let token_idx = self.position.min(self.tokens.len() - 1);
|
||||
Ok(self.tokens[token_idx].position)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TokenStream<'_, '_> {
|
||||
@ -76,3 +93,11 @@ impl Drop for TokenStream<'_, '_> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("Expected {} at Ln {}, Col {}, got {:?}", .0, (.2).1, (.2).0, .1)]
|
||||
Expected(String, Token, Position),
|
||||
#[error("Source file contains no tokens")]
|
||||
FileEmpty,
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user