diff --git a/README.md b/README.md new file mode 100644 index 0000000..0e91a2e --- /dev/null +++ b/README.md @@ -0,0 +1,59 @@ +# Simple C-compiler + +This is a simple work-in-progress C-compiler (or C-like) written in C++ with the +goal of learning to write C++ better in the future, and to provide reference for +my knowledge about modern C++ programming. + +As of writing, a simple fibonacci sequence program is already possible to be +compiled and executed, and can be viewed via [`test.c`](./test.c). + +As far as compiler-design goes, this project still falls behind my other +project, [Reid-LLVM](https://git.teascade.net/teascade/reid-llvm), which is +significantly more capable and more robust. + +## Structure of the program + +The program is structured into several different staged, all of which are +orchestrated via main.cpp. + +Currently the stages are as follows: + +1. Firstly, the program is **tokenized**. This stage could also be called the + lexer, depending on your preference. In this stage, the source code for the + program is transformed into discrete tokens which can then be used during the + parsing phase easier than regular text. The code for this stage is mostly in + [`src/tokens.cpp`](src/tokens.cpp). +2. **TODO:** Preprocessing stage hasn't yet been developed, but it will go here. +3. Then the program is **parsed**. This is the stage where the tokens from the + previous stage(s) are converted into an Abstract Syntax Tree (AST), which is + a format that is easier for the computer to process. The AST itself lives in + [`src/ast.h`](src/ast.h), and the code for the parsing phase lives in + [`src/parsing.cpp`](src/parsing.cpp). +4. **TODO:** Typechecking phase hasn't yet been developed, but it will go here. +5. Finally the program is **compiled**, or in other words **code-generated**, + hence why this is the **codegen** stage. This is where the AST from the + previous stages is taken and LLVM Intermediate Representation is produced + using LLVM-bindings. + +## Compiling and running the program + +In order to compile the program, you need the following: +- CMake +- C++20 (or newer) capable compiler +- LLVM 21.1.0 or newer + +And in order to execute the program which is compiled you also need: +- LLVM 21.1.0 or newer (as it is dynamically linked) +- `whereis`-utility in `$PATH` +- `ld`-utility in `$PATH` + +Then, to compile the program you run: +```sh +cmake -Bbuild +make -C build +``` + +and to run the program, run simply `./build/llvm_c_compiler`. This will read a +file called `test.c` from `$PWD`, and produce two files (`test.o` and `test`). +An executable file called `test` is produced as a result, compiled from the +original `test.c`. \ No newline at end of file diff --git a/src/codegen.cpp b/src/codegen.cpp index 15fea41..844f7e8 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -187,7 +187,7 @@ namespace AST { param_ty_ptrs.push_back(std::move(param.second)); } - auto fn_ty_ptr = std::shared_ptr{ new types::FunctionType{ ret_ty_ptr, param_ty_ptrs } }; + auto fn_ty_ptr = std::shared_ptr{ new types::FunctionType{ ret_ty_ptr, param_ty_ptrs, this->m_is_vararg } }; auto fn_ty = fn_ty_ptr->codegen(builder); auto function = llvm::Function::Create( @@ -254,7 +254,7 @@ namespace types { auto ret_ty = this->m_ret_ty->codegen(builder); - return llvm::FunctionType::get(ret_ty, params, false); + return llvm::FunctionType::get(ret_ty, params, this->m_vararg); } llvm::Type* PointerType::codegen(codegen::Builder& builder) { diff --git a/src/main.cpp b/src/main.cpp index 185272e..fe5f909 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -159,11 +159,18 @@ std::optional compile(std::string_view in_filename) { }; } +struct ClosePipeDeleter { + // Note 2: Consider adding noexcept. + void operator()(FILE* file) const { + pclose(file); + } +}; + /// @brief Executes a given command and returns the output as std::string std::string exec(const char* cmd) { std::array buffer; std::string result; - std::unique_ptr pipe(popen(cmd, "r"), pclose); + std::unique_ptr pipe(popen(cmd, "r"), ClosePipeDeleter{}); if (!pipe) { throw std::runtime_error("popen() failed!"); } diff --git a/src/parsing.cpp b/src/parsing.cpp index 06de893..2f4d752 100644 --- a/src/parsing.cpp +++ b/src/parsing.cpp @@ -42,7 +42,7 @@ namespace parsing { stream.m_position = inner.m_position; - return std::move(returned); + return returned; } catch (std::runtime_error& error) { return std::string{ error.what() }; diff --git a/src/types.cpp b/src/types.cpp index 3c7928c..715d532 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -109,6 +109,12 @@ namespace types { out << param->formatted(); } + if (this->m_vararg) { + if (counter > 0) + out << ", "; + out << "..."; + } + out << ") -> " << this->m_ret_ty->formatted(); return out.str(); } diff --git a/src/types.h b/src/types.h index b9c40c1..ff39f29 100644 --- a/src/types.h +++ b/src/types.h @@ -59,9 +59,10 @@ namespace types { private: std::shared_ptr m_ret_ty; std::vector> m_param_tys; + bool m_vararg; public: - FunctionType(std::shared_ptr ret_ty, std::vector> param_tys) - : m_ret_ty{ std::move(ret_ty) }, m_param_tys{ std::move(param_tys) } { + FunctionType(std::shared_ptr ret_ty, std::vector> param_tys, bool vararg) + : m_ret_ty{ std::move(ret_ty) }, m_param_tys{ std::move(param_tys) }, m_vararg{ vararg } { } virtual ~FunctionType() override = default; virtual std::string formatted() override;