Compare commits
10 Commits
44f9438839
...
866d78e6d1
| Author | SHA1 | Date | |
|---|---|---|---|
| 866d78e6d1 | |||
| 8646d5c0d0 | |||
| 3f61e3749e | |||
| 5dc6e8ca16 | |||
| 96e4d6232f | |||
| bd76e8676f | |||
| ac7731446e | |||
| 6855360a97 | |||
| 7cf752f67b | |||
| 07c0059c5c |
59
README.md
Normal file
59
README.md
Normal file
@ -0,0 +1,59 @@
|
||||
# Simple C-compiler
|
||||
|
||||
This is a simple work-in-progress C-compiler (or C-like) written in C++ with the
|
||||
goal of learning to write C++ better in the future, and to provide reference for
|
||||
my knowledge about modern C++ programming.
|
||||
|
||||
As of writing, a simple fibonacci sequence program is already possible to be
|
||||
compiled and executed, and can be viewed via [`test.c`](./test.c).
|
||||
|
||||
As far as compiler-design goes, this project still falls behind my other
|
||||
project, [Reid-LLVM](https://git.teascade.net/teascade/reid-llvm), which is
|
||||
significantly more capable and more robust.
|
||||
|
||||
## Structure of the program
|
||||
|
||||
The program is structured into several different staged, all of which are
|
||||
orchestrated via main.cpp.
|
||||
|
||||
Currently the stages are as follows:
|
||||
|
||||
1. Firstly, the program is **tokenized**. This stage could also be called the
|
||||
lexer, depending on your preference. In this stage, the source code for the
|
||||
program is transformed into discrete tokens which can then be used during the
|
||||
parsing phase easier than regular text. The code for this stage is mostly in
|
||||
[`src/tokens.cpp`](src/tokens.cpp).
|
||||
2. **TODO:** Preprocessing stage hasn't yet been developed, but it will go here.
|
||||
3. Then the program is **parsed**. This is the stage where the tokens from the
|
||||
previous stage(s) are converted into an Abstract Syntax Tree (AST), which is
|
||||
a format that is easier for the computer to process. The AST itself lives in
|
||||
[`src/ast.h`](src/ast.h), and the code for the parsing phase lives in
|
||||
[`src/parsing.cpp`](src/parsing.cpp).
|
||||
4. **TODO:** Typechecking phase hasn't yet been developed, but it will go here.
|
||||
5. Finally the program is **compiled**, or in other words **code-generated**,
|
||||
hence why this is the **codegen** stage. This is where the AST from the
|
||||
previous stages is taken and LLVM Intermediate Representation is produced
|
||||
using LLVM-bindings.
|
||||
|
||||
## Compiling and running the program
|
||||
|
||||
In order to compile the program, you need the following:
|
||||
- CMake
|
||||
- C++20 (or newer) capable compiler
|
||||
- LLVM 21.1.0 or newer
|
||||
|
||||
And in order to execute the program which is compiled you also need:
|
||||
- LLVM 21.1.0 or newer (as it is dynamically linked)
|
||||
- `whereis`-utility in `$PATH`
|
||||
- `ld`-utility in `$PATH`
|
||||
|
||||
Then, to compile the program you run:
|
||||
```sh
|
||||
cmake -Bbuild
|
||||
make -C build
|
||||
```
|
||||
|
||||
and to run the program, run simply `./build/llvm_c_compiler`. This will read a
|
||||
file called `test.c` from `$PWD`, and produce two files (`test.o` and `test`).
|
||||
An executable file called `test` is produced as a result, compiled from the
|
||||
original `test.c`.
|
||||
27
src/ast.cpp
27
src/ast.cpp
@ -9,6 +9,12 @@ namespace AST {
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::string StringLiteralExpression::formatted() {
|
||||
std::stringstream out{ "" };
|
||||
out << "\"" << this->m_value << "\"";
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::string ValueReferenceExpression::formatted() {
|
||||
return this->m_name;
|
||||
}
|
||||
@ -80,17 +86,26 @@ namespace AST {
|
||||
for (auto& param : this->m_params) {
|
||||
if (counter++ > 0)
|
||||
out << ", ";
|
||||
out << param.second->formatted() << " " << param.first;
|
||||
out << param.second->formatted();
|
||||
if (param.first) {
|
||||
out << " " << *param.first;
|
||||
}
|
||||
}
|
||||
if (this->m_is_vararg) {
|
||||
if (counter > 0)
|
||||
out << ", ";
|
||||
out << "...";
|
||||
}
|
||||
|
||||
out << ") -> ";
|
||||
out << this->m_return_ty->formatted();
|
||||
out << " {\n";
|
||||
for (auto& statement : this->m_statements) {
|
||||
out << " " << statement->formatted() << "\n";
|
||||
if (this->m_statements) {
|
||||
out << " {\n";
|
||||
for (auto& statement : *this->m_statements) {
|
||||
out << " " << statement->formatted() << "\n";
|
||||
}
|
||||
out << "}";
|
||||
}
|
||||
|
||||
out << "}";
|
||||
return out.str();
|
||||
}
|
||||
}
|
||||
21
src/ast.h
21
src/ast.h
@ -41,6 +41,16 @@ namespace AST {
|
||||
virtual codegen::StackValue codegen(codegen::Builder& builder, codegen::Scope& scope) override;
|
||||
};
|
||||
|
||||
class StringLiteralExpression : public Expression {
|
||||
private:
|
||||
std::string m_value;
|
||||
public:
|
||||
StringLiteralExpression(token::Metadata meta, std::string value) : Expression{ meta }, m_value{ value } {}
|
||||
virtual ~StringLiteralExpression() override = default;
|
||||
virtual std::string formatted() override;
|
||||
virtual codegen::StackValue codegen(codegen::Builder& builder, codegen::Scope& scope) override;
|
||||
};
|
||||
|
||||
class ValueReferenceExpression : public Expression {
|
||||
private:
|
||||
std::string m_name;
|
||||
@ -165,19 +175,22 @@ namespace AST {
|
||||
class Function : public TopLevelStatement {
|
||||
private:
|
||||
std::unique_ptr<types::Type> m_return_ty;
|
||||
std::vector<std::pair<std::string, std::unique_ptr<types::Type>>> m_params;
|
||||
std::vector<std::pair<std::optional<std::string>, std::unique_ptr<types::Type>>> m_params;
|
||||
bool m_is_vararg;
|
||||
std::string m_name;
|
||||
std::vector<std::unique_ptr<Statement>> m_statements;
|
||||
std::optional<std::vector<std::unique_ptr<Statement>>> m_statements;
|
||||
public:
|
||||
Function(
|
||||
token::Metadata meta,
|
||||
std::unique_ptr<types::Type> return_ty,
|
||||
std::vector<std::pair<std::string, std::unique_ptr<types::Type>>> params,
|
||||
std::vector<std::pair<std::optional<std::string>, std::unique_ptr<types::Type>>> params,
|
||||
bool is_vararg,
|
||||
std::string name,
|
||||
std::vector<std::unique_ptr<Statement>> statements)
|
||||
std::optional<std::vector<std::unique_ptr<Statement>>> statements)
|
||||
: TopLevelStatement{ meta }
|
||||
, m_return_ty{ std::move(return_ty) }
|
||||
, m_params{ std::move(params) }
|
||||
, m_is_vararg{ is_vararg }
|
||||
, m_name{ name }
|
||||
, m_statements{ std::move(statements) } {
|
||||
}
|
||||
|
||||
@ -22,7 +22,19 @@ namespace AST {
|
||||
|
||||
return codegen::StackValue{
|
||||
llvm::ConstantInt::get(ty, this->m_value),
|
||||
std::unique_ptr<types::Type>{stack_type}
|
||||
std::unique_ptr<types::Type>{stack_type}
|
||||
};
|
||||
}
|
||||
|
||||
codegen::StackValue StringLiteralExpression::codegen(codegen::Builder& builder, codegen::Scope&) {
|
||||
|
||||
auto stack_type = new types::PointerType{ std::make_unique<types::FundamentalType>(types::FundamentalTypeKind::Char) };
|
||||
|
||||
auto str = llvm::StringRef{ this->m_value.c_str() };
|
||||
|
||||
return codegen::StackValue{
|
||||
builder.builder->CreateGlobalString(str),
|
||||
std::unique_ptr<types::Type>{stack_type},
|
||||
};
|
||||
}
|
||||
|
||||
@ -175,7 +187,7 @@ namespace AST {
|
||||
param_ty_ptrs.push_back(std::move(param.second));
|
||||
}
|
||||
|
||||
auto fn_ty_ptr = std::shared_ptr<types::Type>{ new types::FunctionType{ ret_ty_ptr, param_ty_ptrs } };
|
||||
auto fn_ty_ptr = std::shared_ptr<types::Type>{ new types::FunctionType{ ret_ty_ptr, param_ty_ptrs, this->m_is_vararg } };
|
||||
|
||||
auto fn_ty = fn_ty_ptr->codegen(builder);
|
||||
auto function = llvm::Function::Create(
|
||||
@ -187,24 +199,28 @@ namespace AST {
|
||||
|
||||
scope.values[this->m_name] = codegen::StackValue{ function, fn_ty_ptr };
|
||||
|
||||
auto BB = llvm::BasicBlock::Create(*builder.context, "entry", function, nullptr);
|
||||
builder.block = BB;
|
||||
if (this->m_statements) {
|
||||
auto BB = llvm::BasicBlock::Create(*builder.context, "entry", function, nullptr);
|
||||
builder.block = BB;
|
||||
|
||||
codegen::Scope inner_scope{ scope };
|
||||
codegen::Scope inner_scope{ scope };
|
||||
|
||||
int counter = 0;
|
||||
for (auto& param : this->m_params) {
|
||||
auto param_ty_ptr = param_ty_ptrs[counter];
|
||||
auto arg = function->getArg(counter++);
|
||||
arg->setName(param.first);
|
||||
inner_scope.values[param.first] = codegen::StackValue{
|
||||
arg,
|
||||
param_ty_ptr,
|
||||
};
|
||||
}
|
||||
int counter = 0;
|
||||
for (auto& param : this->m_params) {
|
||||
auto param_ty_ptr = param_ty_ptrs[counter];
|
||||
auto arg = function->getArg(counter++);
|
||||
if (param.first) {
|
||||
arg->setName(*param.first);
|
||||
inner_scope.values[*param.first] = codegen::StackValue{
|
||||
arg,
|
||||
param_ty_ptr,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& statement : this->m_statements) {
|
||||
statement->codegen(builder, inner_scope);
|
||||
for (auto& statement : *this->m_statements) {
|
||||
statement->codegen(builder, inner_scope);
|
||||
}
|
||||
}
|
||||
|
||||
llvm::verifyFunction(*function);
|
||||
@ -220,6 +236,10 @@ namespace types {
|
||||
return builder.builder->getInt32Ty();
|
||||
case FundamentalTypeKind::Bool:
|
||||
return builder.builder->getInt1Ty();
|
||||
case FundamentalTypeKind::Char:
|
||||
return builder.builder->getInt8Ty();
|
||||
case FundamentalTypeKind::Void:
|
||||
return builder.builder->getVoidTy();
|
||||
default:
|
||||
return builder.builder->getVoidTy();
|
||||
}
|
||||
@ -234,7 +254,7 @@ namespace types {
|
||||
|
||||
auto ret_ty = this->m_ret_ty->codegen(builder);
|
||||
|
||||
return llvm::FunctionType::get(ret_ty, params, false);
|
||||
return llvm::FunctionType::get(ret_ty, params, this->m_vararg);
|
||||
}
|
||||
|
||||
llvm::Type* PointerType::codegen(codegen::Builder& builder) {
|
||||
|
||||
11
src/main.cpp
11
src/main.cpp
@ -20,8 +20,6 @@
|
||||
#include "tokens.h"
|
||||
#include "parsing.h"
|
||||
|
||||
void llvm_hello_world();
|
||||
|
||||
std::string read_file(std::string_view filepath) {
|
||||
std::ifstream input{ std::string{filepath} };
|
||||
if (!input) {
|
||||
@ -161,11 +159,18 @@ std::optional<CompileOutput> compile(std::string_view in_filename) {
|
||||
};
|
||||
}
|
||||
|
||||
struct ClosePipeDeleter {
|
||||
// Note 2: Consider adding noexcept.
|
||||
void operator()(FILE* file) const {
|
||||
pclose(file);
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief Executes a given command and returns the output as std::string
|
||||
std::string exec(const char* cmd) {
|
||||
std::array<char, 128> buffer;
|
||||
std::string result;
|
||||
std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
|
||||
std::unique_ptr<FILE, ClosePipeDeleter> pipe(popen(cmd, "r"), ClosePipeDeleter{});
|
||||
if (!pipe) {
|
||||
throw std::runtime_error("popen() failed!");
|
||||
}
|
||||
|
||||
@ -12,10 +12,37 @@ namespace parsing {
|
||||
try {
|
||||
auto token = inner.expect(token::Type::Ident);
|
||||
|
||||
// TODO eventually make this be potentially more than one word
|
||||
std::string type_name = token.content;
|
||||
|
||||
std::unique_ptr<types::Type> returned{};
|
||||
|
||||
if (type_name == "int") {
|
||||
auto ty = new types::FundamentalType{ types::FundamentalTypeKind::Int };
|
||||
returned = std::unique_ptr<types::Type>{ ty };
|
||||
}
|
||||
else if (type_name == "char") {
|
||||
auto ty = new types::FundamentalType{ types::FundamentalTypeKind::Char };
|
||||
returned = std::unique_ptr<types::Type>{ ty };
|
||||
}
|
||||
else if (type_name == "void") {
|
||||
auto ty = new types::FundamentalType{ types::FundamentalTypeKind::Void };
|
||||
returned = std::unique_ptr<types::Type>{ ty };
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error("Expected type name, got " + type_name);
|
||||
}
|
||||
|
||||
while (inner.peek().type == token::Type::Symbol && inner.peek().content == "*") {
|
||||
inner.next();
|
||||
auto ty = new types::PointerType{ std::move(returned) };
|
||||
returned = std::unique_ptr<types::Type>{ ty };
|
||||
}
|
||||
|
||||
|
||||
stream.m_position = inner.m_position;
|
||||
|
||||
auto ty = new types::FundamentalType{ types::FundamentalTypeKind::Int };
|
||||
return std::unique_ptr<types::Type>{ ty };
|
||||
return returned;
|
||||
}
|
||||
catch (std::runtime_error& error) {
|
||||
return std::string{ error.what() };
|
||||
@ -32,6 +59,12 @@ namespace parsing {
|
||||
auto expr = new AST::IntLiteralExpression{ token.metadata, std::stoi(token.content) };
|
||||
return std::unique_ptr<AST::Expression>{ expr };
|
||||
}
|
||||
else if (token.type == token::Type::LiteralStr) {
|
||||
stream.m_position = inner.m_position;
|
||||
|
||||
auto expr = new AST::StringLiteralExpression{ token.metadata, token.content };
|
||||
return std::unique_ptr<AST::Expression>{ expr };
|
||||
}
|
||||
else if (token.type == token::Type::Ident) {
|
||||
stream.m_position = inner.m_position;
|
||||
|
||||
@ -241,33 +274,60 @@ namespace parsing {
|
||||
auto name_token = inner.expect(token::Type::Ident);
|
||||
inner.expect(token::Type::Symbol, "(");
|
||||
|
||||
std::vector<std::pair<std::string, std::unique_ptr<types::Type>>> params;
|
||||
std::vector<std::pair<std::optional<std::string>, std::unique_ptr<types::Type>>> params;
|
||||
bool is_vararg = false;
|
||||
while (inner.peek().content != ")") {
|
||||
if (params.size() > 0) {
|
||||
inner.expect(token::Type::Symbol, ",");
|
||||
}
|
||||
|
||||
if (inner.peek().content == ".") {
|
||||
inner.next();
|
||||
inner.expect(token::Type::Symbol, ".");
|
||||
inner.expect(token::Type::Symbol, ".");
|
||||
is_vararg = true;
|
||||
break;
|
||||
}
|
||||
|
||||
auto param_ty = parse_type(inner).unwrap();
|
||||
auto param_name = inner.expect(token::Type::Ident);
|
||||
params.push_back(std::pair(param_name.content, std::move(param_ty)));
|
||||
std::optional<std::string> param_name{};
|
||||
if (inner.peek().type == token::Type::Ident) {
|
||||
param_name = inner.expect(token::Type::Ident).content;
|
||||
}
|
||||
params.push_back(std::pair(param_name, std::move(param_ty)));
|
||||
}
|
||||
|
||||
inner.expect(token::Type::Symbol, ")");
|
||||
inner.expect(token::Type::Symbol, "{");
|
||||
|
||||
std::vector<std::unique_ptr<AST::Statement>> statements{};
|
||||
std::optional<std::vector<std::unique_ptr<AST::Statement>>> statements{};
|
||||
if (inner.peek().content == "{") {
|
||||
inner.expect(token::Type::Symbol, "{");
|
||||
|
||||
auto statement = parse_statement(inner);
|
||||
while (statement.ok()) {
|
||||
statements.push_back(statement.unwrap());
|
||||
statement = parse_statement(inner);
|
||||
std::vector<std::unique_ptr<AST::Statement>> statement_list{};
|
||||
|
||||
auto statement = parse_statement(inner);
|
||||
while (statement.ok()) {
|
||||
statement_list.push_back(statement.unwrap());
|
||||
statement = parse_statement(inner);
|
||||
}
|
||||
|
||||
statements = std::optional{ std::move(statement_list) };
|
||||
inner.expect(token::Type::Symbol, "}");
|
||||
}
|
||||
else {
|
||||
inner.expect(token::Type::Symbol, ";");
|
||||
}
|
||||
|
||||
|
||||
inner.expect(token::Type::Symbol, "}");
|
||||
|
||||
stream.m_position = inner.m_position;
|
||||
|
||||
auto fun = new AST::Function{ before_meta + stream.metadata(), std::move(type), std::move(params), name_token.content, std::move(statements) };
|
||||
auto fun = new AST::Function{
|
||||
before_meta + stream.metadata(),
|
||||
std::move(type),
|
||||
std::move(params),
|
||||
is_vararg,
|
||||
name_token.content,
|
||||
std::move(statements)
|
||||
};
|
||||
return std::unique_ptr<AST::TopLevelStatement>{ fun };
|
||||
}
|
||||
catch (std::runtime_error& error) {
|
||||
|
||||
@ -23,6 +23,8 @@ namespace token {
|
||||
return "Symbol";
|
||||
case token::Type::LiteralInt:
|
||||
return "LiteralInt";
|
||||
case token::Type::LiteralStr:
|
||||
return "LiteralStr";
|
||||
|
||||
case token::Type::ReturnKeyword:
|
||||
return "Return";
|
||||
@ -126,7 +128,9 @@ namespace token {
|
||||
uint32_t line = 0;
|
||||
uint32_t line_start = 0;
|
||||
|
||||
for (int i = 0; i < static_cast<int>(text.length());) {
|
||||
int text_length = static_cast<int>(text.length());
|
||||
|
||||
for (int i = 0; i < text_length;) {
|
||||
Position position{ line, i - line_start };
|
||||
Metadata meta{ position, position, filename };
|
||||
|
||||
@ -136,14 +140,27 @@ namespace token {
|
||||
std::string content{};
|
||||
do {
|
||||
content += c;
|
||||
if ((i + 1) >= text_length) break;
|
||||
c = text[++i];
|
||||
} while (std::isdigit(c));
|
||||
tokens.push_back(token::Token{ token::Type::LiteralInt, content, meta + content.size() });
|
||||
}
|
||||
else if (c == '\"') {
|
||||
std::string content{};
|
||||
c = text[++i]; // Skip initial "
|
||||
do {
|
||||
content += c;
|
||||
if ((i + 1) >= text_length) break;
|
||||
c = text[++i];
|
||||
} while (c != '\"');
|
||||
i++; // Skip second "
|
||||
tokens.push_back(token::Token{ token::Type::LiteralStr, content, meta + (content.size() + 2) });
|
||||
}
|
||||
else if (std::isalpha(c)) {
|
||||
std::string content{};
|
||||
do {
|
||||
content += c;
|
||||
if ((i + 1) >= text_length) break;
|
||||
c = text[++i];
|
||||
} while (std::isalnum(c));
|
||||
|
||||
@ -167,6 +184,7 @@ namespace token {
|
||||
line_start = i + 1;
|
||||
}
|
||||
content += c;
|
||||
if ((i + 1) >= text_length) break;
|
||||
c = text[++i];
|
||||
} while (iswhitespace(c));
|
||||
// tokens.push_back(token::Token{ token::Type::Whitespace, content });
|
||||
|
||||
@ -11,6 +11,7 @@ namespace token {
|
||||
Ident,
|
||||
Symbol,
|
||||
LiteralInt,
|
||||
LiteralStr,
|
||||
|
||||
ReturnKeyword,
|
||||
IfKeyword,
|
||||
|
||||
@ -42,6 +42,12 @@ namespace types {
|
||||
switch (this->m_ty) {
|
||||
case FundamentalTypeKind::Int:
|
||||
return "Int";
|
||||
case FundamentalTypeKind::Bool:
|
||||
return "Bool";
|
||||
case FundamentalTypeKind::Char:
|
||||
return "Char";
|
||||
case FundamentalTypeKind::Void:
|
||||
return "Void";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
@ -103,6 +109,12 @@ namespace types {
|
||||
out << param->formatted();
|
||||
}
|
||||
|
||||
if (this->m_vararg) {
|
||||
if (counter > 0)
|
||||
out << ", ";
|
||||
out << "...";
|
||||
}
|
||||
|
||||
out << ") -> " << this->m_ret_ty->formatted();
|
||||
return out.str();
|
||||
}
|
||||
@ -134,7 +146,7 @@ namespace types {
|
||||
|
||||
std::string PointerType::formatted() {
|
||||
std::stringstream out{ "" };
|
||||
out << this->m_inner << "*";
|
||||
out << this->m_inner->formatted() << "*";
|
||||
return out.str();
|
||||
}
|
||||
|
||||
|
||||
@ -20,7 +20,9 @@ namespace types {
|
||||
|
||||
enum FundamentalTypeKind {
|
||||
Int,
|
||||
Bool
|
||||
Bool,
|
||||
Char,
|
||||
Void,
|
||||
};
|
||||
|
||||
class Type {
|
||||
@ -57,9 +59,10 @@ namespace types {
|
||||
private:
|
||||
std::shared_ptr<Type> m_ret_ty;
|
||||
std::vector<std::shared_ptr<Type>> m_param_tys;
|
||||
bool m_vararg;
|
||||
public:
|
||||
FunctionType(std::shared_ptr<Type> ret_ty, std::vector<std::shared_ptr<Type>> param_tys)
|
||||
: m_ret_ty{ std::move(ret_ty) }, m_param_tys{ std::move(param_tys) } {
|
||||
FunctionType(std::shared_ptr<Type> ret_ty, std::vector<std::shared_ptr<Type>> param_tys, bool vararg)
|
||||
: m_ret_ty{ std::move(ret_ty) }, m_param_tys{ std::move(param_tys) }, m_vararg{ vararg } {
|
||||
}
|
||||
virtual ~FunctionType() override = default;
|
||||
virtual std::string formatted() override;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user