Compare commits

..

No commits in common. "866d78e6d184a37d782ddc9e51cb08c81b61827f" and "44f94388391d8cf0b6c5a630f7b8b90c47939286" have entirely different histories.

11 changed files with 52 additions and 261 deletions

View File

@ -1,59 +0,0 @@
# Simple C-compiler
This is a simple work-in-progress C-compiler (or C-like) written in C++ with the
goal of learning to write C++ better in the future, and to provide reference for
my knowledge about modern C++ programming.
As of writing, a simple fibonacci sequence program is already possible to be
compiled and executed, and can be viewed via [`test.c`](./test.c).
As far as compiler-design goes, this project still falls behind my other
project, [Reid-LLVM](https://git.teascade.net/teascade/reid-llvm), which is
significantly more capable and more robust.
## Structure of the program
The program is structured into several different staged, all of which are
orchestrated via main.cpp.
Currently the stages are as follows:
1. Firstly, the program is **tokenized**. This stage could also be called the
lexer, depending on your preference. In this stage, the source code for the
program is transformed into discrete tokens which can then be used during the
parsing phase easier than regular text. The code for this stage is mostly in
[`src/tokens.cpp`](src/tokens.cpp).
2. **TODO:** Preprocessing stage hasn't yet been developed, but it will go here.
3. Then the program is **parsed**. This is the stage where the tokens from the
previous stage(s) are converted into an Abstract Syntax Tree (AST), which is
a format that is easier for the computer to process. The AST itself lives in
[`src/ast.h`](src/ast.h), and the code for the parsing phase lives in
[`src/parsing.cpp`](src/parsing.cpp).
4. **TODO:** Typechecking phase hasn't yet been developed, but it will go here.
5. Finally the program is **compiled**, or in other words **code-generated**,
hence why this is the **codegen** stage. This is where the AST from the
previous stages is taken and LLVM Intermediate Representation is produced
using LLVM-bindings.
## Compiling and running the program
In order to compile the program, you need the following:
- CMake
- C++20 (or newer) capable compiler
- LLVM 21.1.0 or newer
And in order to execute the program which is compiled you also need:
- LLVM 21.1.0 or newer (as it is dynamically linked)
- `whereis`-utility in `$PATH`
- `ld`-utility in `$PATH`
Then, to compile the program you run:
```sh
cmake -Bbuild
make -C build
```
and to run the program, run simply `./build/llvm_c_compiler`. This will read a
file called `test.c` from `$PWD`, and produce two files (`test.o` and `test`).
An executable file called `test` is produced as a result, compiled from the
original `test.c`.

View File

@ -9,12 +9,6 @@ namespace AST {
return out.str(); return out.str();
} }
std::string StringLiteralExpression::formatted() {
std::stringstream out{ "" };
out << "\"" << this->m_value << "\"";
return out.str();
}
std::string ValueReferenceExpression::formatted() { std::string ValueReferenceExpression::formatted() {
return this->m_name; return this->m_name;
} }
@ -86,26 +80,17 @@ namespace AST {
for (auto& param : this->m_params) { for (auto& param : this->m_params) {
if (counter++ > 0) if (counter++ > 0)
out << ", "; out << ", ";
out << param.second->formatted(); out << param.second->formatted() << " " << param.first;
if (param.first) {
out << " " << *param.first;
}
}
if (this->m_is_vararg) {
if (counter > 0)
out << ", ";
out << "...";
} }
out << ") -> "; out << ") -> ";
out << this->m_return_ty->formatted(); out << this->m_return_ty->formatted();
if (this->m_statements) {
out << " {\n"; out << " {\n";
for (auto& statement : *this->m_statements) { for (auto& statement : this->m_statements) {
out << " " << statement->formatted() << "\n"; out << " " << statement->formatted() << "\n";
} }
out << "}"; out << "}";
}
return out.str(); return out.str();
} }
} }

View File

@ -41,16 +41,6 @@ namespace AST {
virtual codegen::StackValue codegen(codegen::Builder& builder, codegen::Scope& scope) override; virtual codegen::StackValue codegen(codegen::Builder& builder, codegen::Scope& scope) override;
}; };
class StringLiteralExpression : public Expression {
private:
std::string m_value;
public:
StringLiteralExpression(token::Metadata meta, std::string value) : Expression{ meta }, m_value{ value } {}
virtual ~StringLiteralExpression() override = default;
virtual std::string formatted() override;
virtual codegen::StackValue codegen(codegen::Builder& builder, codegen::Scope& scope) override;
};
class ValueReferenceExpression : public Expression { class ValueReferenceExpression : public Expression {
private: private:
std::string m_name; std::string m_name;
@ -175,22 +165,19 @@ namespace AST {
class Function : public TopLevelStatement { class Function : public TopLevelStatement {
private: private:
std::unique_ptr<types::Type> m_return_ty; std::unique_ptr<types::Type> m_return_ty;
std::vector<std::pair<std::optional<std::string>, std::unique_ptr<types::Type>>> m_params; std::vector<std::pair<std::string, std::unique_ptr<types::Type>>> m_params;
bool m_is_vararg;
std::string m_name; std::string m_name;
std::optional<std::vector<std::unique_ptr<Statement>>> m_statements; std::vector<std::unique_ptr<Statement>> m_statements;
public: public:
Function( Function(
token::Metadata meta, token::Metadata meta,
std::unique_ptr<types::Type> return_ty, std::unique_ptr<types::Type> return_ty,
std::vector<std::pair<std::optional<std::string>, std::unique_ptr<types::Type>>> params, std::vector<std::pair<std::string, std::unique_ptr<types::Type>>> params,
bool is_vararg,
std::string name, std::string name,
std::optional<std::vector<std::unique_ptr<Statement>>> statements) std::vector<std::unique_ptr<Statement>> statements)
: TopLevelStatement{ meta } : TopLevelStatement{ meta }
, m_return_ty{ std::move(return_ty) } , m_return_ty{ std::move(return_ty) }
, m_params{ std::move(params) } , m_params{ std::move(params) }
, m_is_vararg{ is_vararg }
, m_name{ name } , m_name{ name }
, m_statements{ std::move(statements) } { , m_statements{ std::move(statements) } {
} }

View File

@ -26,18 +26,6 @@ namespace AST {
}; };
} }
codegen::StackValue StringLiteralExpression::codegen(codegen::Builder& builder, codegen::Scope&) {
auto stack_type = new types::PointerType{ std::make_unique<types::FundamentalType>(types::FundamentalTypeKind::Char) };
auto str = llvm::StringRef{ this->m_value.c_str() };
return codegen::StackValue{
builder.builder->CreateGlobalString(str),
std::unique_ptr<types::Type>{stack_type},
};
}
codegen::StackValue ValueReferenceExpression::codegen(codegen::Builder& builder, codegen::Scope& scope) { codegen::StackValue ValueReferenceExpression::codegen(codegen::Builder& builder, codegen::Scope& scope) {
auto value = scope.values.find(this->m_name); auto value = scope.values.find(this->m_name);
if (value != scope.values.end()) { if (value != scope.values.end()) {
@ -187,7 +175,7 @@ namespace AST {
param_ty_ptrs.push_back(std::move(param.second)); param_ty_ptrs.push_back(std::move(param.second));
} }
auto fn_ty_ptr = std::shared_ptr<types::Type>{ new types::FunctionType{ ret_ty_ptr, param_ty_ptrs, this->m_is_vararg } }; auto fn_ty_ptr = std::shared_ptr<types::Type>{ new types::FunctionType{ ret_ty_ptr, param_ty_ptrs } };
auto fn_ty = fn_ty_ptr->codegen(builder); auto fn_ty = fn_ty_ptr->codegen(builder);
auto function = llvm::Function::Create( auto function = llvm::Function::Create(
@ -199,7 +187,6 @@ namespace AST {
scope.values[this->m_name] = codegen::StackValue{ function, fn_ty_ptr }; scope.values[this->m_name] = codegen::StackValue{ function, fn_ty_ptr };
if (this->m_statements) {
auto BB = llvm::BasicBlock::Create(*builder.context, "entry", function, nullptr); auto BB = llvm::BasicBlock::Create(*builder.context, "entry", function, nullptr);
builder.block = BB; builder.block = BB;
@ -209,19 +196,16 @@ namespace AST {
for (auto& param : this->m_params) { for (auto& param : this->m_params) {
auto param_ty_ptr = param_ty_ptrs[counter]; auto param_ty_ptr = param_ty_ptrs[counter];
auto arg = function->getArg(counter++); auto arg = function->getArg(counter++);
if (param.first) { arg->setName(param.first);
arg->setName(*param.first); inner_scope.values[param.first] = codegen::StackValue{
inner_scope.values[*param.first] = codegen::StackValue{
arg, arg,
param_ty_ptr, param_ty_ptr,
}; };
} }
}
for (auto& statement : *this->m_statements) { for (auto& statement : this->m_statements) {
statement->codegen(builder, inner_scope); statement->codegen(builder, inner_scope);
} }
}
llvm::verifyFunction(*function); llvm::verifyFunction(*function);
@ -236,10 +220,6 @@ namespace types {
return builder.builder->getInt32Ty(); return builder.builder->getInt32Ty();
case FundamentalTypeKind::Bool: case FundamentalTypeKind::Bool:
return builder.builder->getInt1Ty(); return builder.builder->getInt1Ty();
case FundamentalTypeKind::Char:
return builder.builder->getInt8Ty();
case FundamentalTypeKind::Void:
return builder.builder->getVoidTy();
default: default:
return builder.builder->getVoidTy(); return builder.builder->getVoidTy();
} }
@ -254,7 +234,7 @@ namespace types {
auto ret_ty = this->m_ret_ty->codegen(builder); auto ret_ty = this->m_ret_ty->codegen(builder);
return llvm::FunctionType::get(ret_ty, params, this->m_vararg); return llvm::FunctionType::get(ret_ty, params, false);
} }
llvm::Type* PointerType::codegen(codegen::Builder& builder) { llvm::Type* PointerType::codegen(codegen::Builder& builder) {

View File

@ -20,6 +20,8 @@
#include "tokens.h" #include "tokens.h"
#include "parsing.h" #include "parsing.h"
void llvm_hello_world();
std::string read_file(std::string_view filepath) { std::string read_file(std::string_view filepath) {
std::ifstream input{ std::string{filepath} }; std::ifstream input{ std::string{filepath} };
if (!input) { if (!input) {
@ -159,18 +161,11 @@ std::optional<CompileOutput> compile(std::string_view in_filename) {
}; };
} }
struct ClosePipeDeleter {
// Note 2: Consider adding noexcept.
void operator()(FILE* file) const {
pclose(file);
}
};
/// @brief Executes a given command and returns the output as std::string /// @brief Executes a given command and returns the output as std::string
std::string exec(const char* cmd) { std::string exec(const char* cmd) {
std::array<char, 128> buffer; std::array<char, 128> buffer;
std::string result; std::string result;
std::unique_ptr<FILE, ClosePipeDeleter> pipe(popen(cmd, "r"), ClosePipeDeleter{}); std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
if (!pipe) { if (!pipe) {
throw std::runtime_error("popen() failed!"); throw std::runtime_error("popen() failed!");
} }

View File

@ -12,37 +12,10 @@ namespace parsing {
try { try {
auto token = inner.expect(token::Type::Ident); auto token = inner.expect(token::Type::Ident);
// TODO eventually make this be potentially more than one word
std::string type_name = token.content;
std::unique_ptr<types::Type> returned{};
if (type_name == "int") {
auto ty = new types::FundamentalType{ types::FundamentalTypeKind::Int };
returned = std::unique_ptr<types::Type>{ ty };
}
else if (type_name == "char") {
auto ty = new types::FundamentalType{ types::FundamentalTypeKind::Char };
returned = std::unique_ptr<types::Type>{ ty };
}
else if (type_name == "void") {
auto ty = new types::FundamentalType{ types::FundamentalTypeKind::Void };
returned = std::unique_ptr<types::Type>{ ty };
}
else {
throw std::runtime_error("Expected type name, got " + type_name);
}
while (inner.peek().type == token::Type::Symbol && inner.peek().content == "*") {
inner.next();
auto ty = new types::PointerType{ std::move(returned) };
returned = std::unique_ptr<types::Type>{ ty };
}
stream.m_position = inner.m_position; stream.m_position = inner.m_position;
return returned; auto ty = new types::FundamentalType{ types::FundamentalTypeKind::Int };
return std::unique_ptr<types::Type>{ ty };
} }
catch (std::runtime_error& error) { catch (std::runtime_error& error) {
return std::string{ error.what() }; return std::string{ error.what() };
@ -59,12 +32,6 @@ namespace parsing {
auto expr = new AST::IntLiteralExpression{ token.metadata, std::stoi(token.content) }; auto expr = new AST::IntLiteralExpression{ token.metadata, std::stoi(token.content) };
return std::unique_ptr<AST::Expression>{ expr }; return std::unique_ptr<AST::Expression>{ expr };
} }
else if (token.type == token::Type::LiteralStr) {
stream.m_position = inner.m_position;
auto expr = new AST::StringLiteralExpression{ token.metadata, token.content };
return std::unique_ptr<AST::Expression>{ expr };
}
else if (token.type == token::Type::Ident) { else if (token.type == token::Type::Ident) {
stream.m_position = inner.m_position; stream.m_position = inner.m_position;
@ -274,60 +241,33 @@ namespace parsing {
auto name_token = inner.expect(token::Type::Ident); auto name_token = inner.expect(token::Type::Ident);
inner.expect(token::Type::Symbol, "("); inner.expect(token::Type::Symbol, "(");
std::vector<std::pair<std::optional<std::string>, std::unique_ptr<types::Type>>> params; std::vector<std::pair<std::string, std::unique_ptr<types::Type>>> params;
bool is_vararg = false;
while (inner.peek().content != ")") { while (inner.peek().content != ")") {
if (params.size() > 0) { if (params.size() > 0) {
inner.expect(token::Type::Symbol, ","); inner.expect(token::Type::Symbol, ",");
} }
if (inner.peek().content == ".") {
inner.next();
inner.expect(token::Type::Symbol, ".");
inner.expect(token::Type::Symbol, ".");
is_vararg = true;
break;
}
auto param_ty = parse_type(inner).unwrap(); auto param_ty = parse_type(inner).unwrap();
std::optional<std::string> param_name{}; auto param_name = inner.expect(token::Type::Ident);
if (inner.peek().type == token::Type::Ident) { params.push_back(std::pair(param_name.content, std::move(param_ty)));
param_name = inner.expect(token::Type::Ident).content;
}
params.push_back(std::pair(param_name, std::move(param_ty)));
} }
inner.expect(token::Type::Symbol, ")"); inner.expect(token::Type::Symbol, ")");
std::optional<std::vector<std::unique_ptr<AST::Statement>>> statements{};
if (inner.peek().content == "{") {
inner.expect(token::Type::Symbol, "{"); inner.expect(token::Type::Symbol, "{");
std::vector<std::unique_ptr<AST::Statement>> statement_list{}; std::vector<std::unique_ptr<AST::Statement>> statements{};
auto statement = parse_statement(inner); auto statement = parse_statement(inner);
while (statement.ok()) { while (statement.ok()) {
statement_list.push_back(statement.unwrap()); statements.push_back(statement.unwrap());
statement = parse_statement(inner); statement = parse_statement(inner);
} }
statements = std::optional{ std::move(statement_list) };
inner.expect(token::Type::Symbol, "}"); inner.expect(token::Type::Symbol, "}");
}
else {
inner.expect(token::Type::Symbol, ";");
}
stream.m_position = inner.m_position; stream.m_position = inner.m_position;
auto fun = new AST::Function{ auto fun = new AST::Function{ before_meta + stream.metadata(), std::move(type), std::move(params), name_token.content, std::move(statements) };
before_meta + stream.metadata(),
std::move(type),
std::move(params),
is_vararg,
name_token.content,
std::move(statements)
};
return std::unique_ptr<AST::TopLevelStatement>{ fun }; return std::unique_ptr<AST::TopLevelStatement>{ fun };
} }
catch (std::runtime_error& error) { catch (std::runtime_error& error) {

View File

@ -23,8 +23,6 @@ namespace token {
return "Symbol"; return "Symbol";
case token::Type::LiteralInt: case token::Type::LiteralInt:
return "LiteralInt"; return "LiteralInt";
case token::Type::LiteralStr:
return "LiteralStr";
case token::Type::ReturnKeyword: case token::Type::ReturnKeyword:
return "Return"; return "Return";
@ -128,9 +126,7 @@ namespace token {
uint32_t line = 0; uint32_t line = 0;
uint32_t line_start = 0; uint32_t line_start = 0;
int text_length = static_cast<int>(text.length()); for (int i = 0; i < static_cast<int>(text.length());) {
for (int i = 0; i < text_length;) {
Position position{ line, i - line_start }; Position position{ line, i - line_start };
Metadata meta{ position, position, filename }; Metadata meta{ position, position, filename };
@ -140,27 +136,14 @@ namespace token {
std::string content{}; std::string content{};
do { do {
content += c; content += c;
if ((i + 1) >= text_length) break;
c = text[++i]; c = text[++i];
} while (std::isdigit(c)); } while (std::isdigit(c));
tokens.push_back(token::Token{ token::Type::LiteralInt, content, meta + content.size() }); tokens.push_back(token::Token{ token::Type::LiteralInt, content, meta + content.size() });
} }
else if (c == '\"') {
std::string content{};
c = text[++i]; // Skip initial "
do {
content += c;
if ((i + 1) >= text_length) break;
c = text[++i];
} while (c != '\"');
i++; // Skip second "
tokens.push_back(token::Token{ token::Type::LiteralStr, content, meta + (content.size() + 2) });
}
else if (std::isalpha(c)) { else if (std::isalpha(c)) {
std::string content{}; std::string content{};
do { do {
content += c; content += c;
if ((i + 1) >= text_length) break;
c = text[++i]; c = text[++i];
} while (std::isalnum(c)); } while (std::isalnum(c));
@ -184,7 +167,6 @@ namespace token {
line_start = i + 1; line_start = i + 1;
} }
content += c; content += c;
if ((i + 1) >= text_length) break;
c = text[++i]; c = text[++i];
} while (iswhitespace(c)); } while (iswhitespace(c));
// tokens.push_back(token::Token{ token::Type::Whitespace, content }); // tokens.push_back(token::Token{ token::Type::Whitespace, content });

View File

@ -11,7 +11,6 @@ namespace token {
Ident, Ident,
Symbol, Symbol,
LiteralInt, LiteralInt,
LiteralStr,
ReturnKeyword, ReturnKeyword,
IfKeyword, IfKeyword,

View File

@ -42,12 +42,6 @@ namespace types {
switch (this->m_ty) { switch (this->m_ty) {
case FundamentalTypeKind::Int: case FundamentalTypeKind::Int:
return "Int"; return "Int";
case FundamentalTypeKind::Bool:
return "Bool";
case FundamentalTypeKind::Char:
return "Char";
case FundamentalTypeKind::Void:
return "Void";
default: default:
return "Unknown"; return "Unknown";
} }
@ -109,12 +103,6 @@ namespace types {
out << param->formatted(); out << param->formatted();
} }
if (this->m_vararg) {
if (counter > 0)
out << ", ";
out << "...";
}
out << ") -> " << this->m_ret_ty->formatted(); out << ") -> " << this->m_ret_ty->formatted();
return out.str(); return out.str();
} }
@ -146,7 +134,7 @@ namespace types {
std::string PointerType::formatted() { std::string PointerType::formatted() {
std::stringstream out{ "" }; std::stringstream out{ "" };
out << this->m_inner->formatted() << "*"; out << this->m_inner << "*";
return out.str(); return out.str();
} }

View File

@ -20,9 +20,7 @@ namespace types {
enum FundamentalTypeKind { enum FundamentalTypeKind {
Int, Int,
Bool, Bool
Char,
Void,
}; };
class Type { class Type {
@ -59,10 +57,9 @@ namespace types {
private: private:
std::shared_ptr<Type> m_ret_ty; std::shared_ptr<Type> m_ret_ty;
std::vector<std::shared_ptr<Type>> m_param_tys; std::vector<std::shared_ptr<Type>> m_param_tys;
bool m_vararg;
public: public:
FunctionType(std::shared_ptr<Type> ret_ty, std::vector<std::shared_ptr<Type>> param_tys, bool vararg) FunctionType(std::shared_ptr<Type> ret_ty, std::vector<std::shared_ptr<Type>> param_tys)
: m_ret_ty{ std::move(ret_ty) }, m_param_tys{ std::move(param_tys) }, m_vararg{ vararg } { : m_ret_ty{ std::move(ret_ty) }, m_param_tys{ std::move(param_tys) } {
} }
virtual ~FunctionType() override = default; virtual ~FunctionType() override = default;
virtual std::string formatted() override; virtual std::string formatted() override;

5
test.c
View File

@ -1,5 +1,3 @@
void printf(char*, ...);
int fibonacci(int n) { int fibonacci(int n) {
if (n < 2) if (n < 2)
return 1; return 1;
@ -7,6 +5,5 @@ int fibonacci(int n) {
} }
int main() { int main() {
printf("10th fibonacci number is %d!", fibonacci(10)); return fibonacci(10);
return 0;
} }