c-compiler/src/main.cpp
2026-04-13 21:08:57 +03:00

253 lines
7.7 KiB
C++

#include <llvm/IR/LLVMContext.h>
#include <llvm/ADT/StringRef.h>
#include <llvm/Support/TargetSelect.h>
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <llvm/MC/TargetRegistry.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Module.h>
#include <llvm/TargetParser/Host.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Verifier.h>
#include <iostream>
#include <fstream>
#include <cctype>
#include <string>
#include "tokens.h"
#include "parsing.h"
#include "errors.h"
std::string read_file(std::string_view filepath) {
std::ifstream input{ std::string{filepath} };
if (!input) {
std::cerr << "Failed to read " << filepath << std::endl;
return {};
}
std::string out{};
std::string currLine;
while (std::getline(input, currLine)) {
if (out.length() > 0) {
out = out + "\n";
}
out = out + currLine;
}
return out;
}
int write_file(std::string_view filepath, std::string& content) {
std::ofstream output{ std::string{filepath} };
if (!output) {
std::cerr << "Failed to open " << filepath << " for reading" << std::endl;
return 1;
}
output << content << std::endl;
return 0;
}
struct CompileOutput {
std::string llvm_ir_string;
std::string obj_string;
};
/// @brief Compiles the contents from the given filename and returns output as strings
std::optional<CompileOutput> compile(std::string_view in_filename) {
// Read contents of the file, print contents
std::string out{ read_file(in_filename) };
std::cout << out << std::endl;
// Tokenize contents, print tokens
auto tokens = token::tokenize(out, std::string{ in_filename });
for (token::Token token : tokens) {
std::cout << token << std::endl;
}
// Parse tokens
auto stream = token::TokenStream{ tokens };
std::vector<std::unique_ptr<AST::TopLevelStatement>> statements;
auto statement = parsing::parse_top_level_statement(stream);
while (statement.ok()) {
statements.push_back(statement.unwrap());
statement = parsing::parse_top_level_statement(stream);
}
if (stream.peek().type != token::Type::Eof) {
std::cerr << statement.unwrap_err() << std::endl;
return {};
}
stream.expect(token::Type::Eof);
// Prepare compiler
auto LLVMContext = std::make_unique<llvm::LLVMContext>();
auto LLVMModule = std::make_unique<llvm::Module>("test.c", *LLVMContext);
auto LLVMBuilder = std::make_unique<llvm::IRBuilder<>>(*LLVMContext);
codegen::Builder builder{
std::move(LLVMContext),
std::move(LLVMModule),
std::move(LLVMBuilder),
{}
};
// Perform static analysis
typecheck::State typecheck_state{};
typecheck_state.binops = types::create_binops();
typecheck_state.casts = types::create_casts();
typecheck::Scope typecheck_scope{};
for (auto& tls : statements) {
std::cout << tls->formatted() << std::endl;
tls->typecheck(typecheck_state, typecheck_scope);
}
if (typecheck_state.errors.size() > 0) {
std::cerr << "Errors while typechecking:" << std::endl;
for (auto& error : typecheck_state.errors) {
std::cerr << " " << error.what() << std::endl;
std::cerr << " at " << error.m_meta.start.line + 1 << ":" << error.m_meta.start.col + 1;
std::cerr << " to " << error.m_meta.end.line + 1 << ":" << error.m_meta.end.col + 1 << std::endl;
}
return {};
}
codegen::Scope cg_scope{
.binops = typecheck_state.binops,
.casts = typecheck_state.casts,
.values = {},
.is_lvalue = false,
};
// Compile parsed output
try {
for (auto& tls : statements) {
std::cout << tls->formatted() << std::endl;
tls->codegen(builder, cg_scope);
}
}
catch (CompileError& error) {
std::cerr << "FATAL: " << error.what() << std::endl;
std::cerr << " at " << error.m_meta.start.line + 1 << ":" << error.m_meta.start.col + 1;
std::cerr << " to " << error.m_meta.end.line + 1 << ":" << error.m_meta.end.col + 1 << std::endl;
return {};
}
// Prepare to print module as output
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllAsmPrinters();
auto TargetTriple = llvm::sys::getDefaultTargetTriple();
std::string Error;
auto Target = llvm::TargetRegistry::lookupTarget(TargetTriple, Error);
llvm::TargetOptions opt;
auto TargetMachine = Target->createTargetMachine(llvm::Triple(TargetTriple), "generic", "", opt, llvm::Reloc::PIC_);
builder.mod->setDataLayout(TargetMachine->createDataLayout());
builder.mod->setTargetTriple(TargetMachine->getTargetTriple());
// Print output to string
std::string llvm_ir_string;
llvm::raw_string_ostream llvm_ir_dest{ llvm_ir_string };
builder.mod->print(llvm_ir_dest, nullptr);
llvm_ir_dest.flush();
// Print output to obj-file
std::error_code EC;
std::string obj_string;
llvm::raw_string_ostream obj_stream{ obj_string };
llvm::buffer_ostream obj_dest{ obj_stream };
if (EC) {
llvm::errs() << "Unable to open file (" << EC.message() << ")";
return {};
}
llvm::legacy::PassManager pass{};
if (TargetMachine->addPassesToEmitFile(pass, obj_dest, nullptr, llvm::CodeGenFileType::ObjectFile)) {
llvm::errs() << "Target Machine can't emit file";
return {};
}
pass.run(*builder.mod);
obj_string = (std::string_view)obj_dest.str();
return CompileOutput{
llvm_ir_string,
obj_string,
};
}
struct ClosePipeDeleter {
// Note 2: Consider adding noexcept.
void operator()(FILE* file) const {
pclose(file);
}
};
/// @brief Executes a given command and returns the output as std::string
std::string exec(const char* cmd) {
std::array<char, 128> buffer;
std::string result;
std::unique_ptr<FILE, ClosePipeDeleter> pipe(popen(cmd, "r"), ClosePipeDeleter{});
if (!pipe) {
throw std::runtime_error("popen() failed!");
}
while (fgets(buffer.data(), static_cast<int>(buffer.size()), pipe.get()) != nullptr) {
result += buffer.data();
}
return result;
}
/// @brief Discovers the path of a given library using POSIX-util "whereis"
/// @return path to the library, empty string if nothing was found
std::string find_lib(std::string libname) {
std::string cmd = "whereis " + libname;
auto output = exec(cmd.c_str());
output.erase(0, libname.size() + 2);
auto path = output.substr(0, output.find('\n'));
path = path.substr(0, path.find(' '));
return path;
}
int main() {
std::string in_filename{ "test.c" };
std::string out_filename{ "test.o" };
std::string exec_filename{ "test" };
auto out = compile(in_filename);
if (out) {
// Print LLVM IR, produce Obj-file
std::cout << out->llvm_ir_string << std::endl;
write_file(out_filename, out->obj_string);
// Find necessary libraries
std::string command = "ld";
auto linker = find_lib("ld-linux-x86-64.so.2");
auto crt1 = find_lib("crt1.o");
auto crti = find_lib("crti.o");
auto crtn = find_lib("crtn.o");
// Link everything together and produce a file with exec_filename
std::string cmd = command + " -dynamic-linker " + linker + " -lc "
+ crt1 + " " + crti + " " + crtn + " "
+ out_filename + " -o " + exec_filename;
std::cout << cmd << std::endl;
std::cout << exec(cmd.c_str()) << std::endl;
}
return 0;
}