Add simple tokenization

This commit is contained in:
Sofia 2026-04-01 21:59:07 +03:00
parent 721f996413
commit 92e0bcd270
2 changed files with 134 additions and 10 deletions

View File

@ -13,8 +13,139 @@
#include <llvm/IR/Verifier.h> #include <llvm/IR/Verifier.h>
#include <iostream> #include <iostream>
#include <fstream>
#include <cctype>
void llvm_hello_world();
std::string read_file(char* filepath) {
std::ifstream input{ filepath };
if (!input) {
std::cerr << "Failed to read " << filepath << std::endl;
return {};
}
std::string out{};
std::string currLine;
while (std::getline(input, currLine)) {
if (out.length() > 0) {
out = out + "\n";
}
out = out + currLine;
}
return out;
}
enum class TokenType {
Ident,
Symbol,
LiteralInt,
ReturnKeyword,
Whitespace,
};
struct Token {
TokenType type;
std::string content;
std::string name() {
switch (type) {
case TokenType::Ident:
return "Ident";
case TokenType::Symbol:
return "Symbol";
case TokenType::LiteralInt:
return "LiteralInt";
case TokenType::ReturnKeyword:
return "Return";
case TokenType::Whitespace:
return "Whitespace";
default:
return "Unknown";
}
}
};
std::ostream& operator<<(std::ostream& stream, Token& token) {
stream << token.name() << "(" << token.content << ")";
return stream;
}
bool iswhitespace(char& character) {
return character == ' '
|| character == '\t'
|| character == '\n'
|| character == '\r';
}
int main() { int main() {
std::string out{ read_file("test.c") };
std::cout << out << std::endl;
std::vector<Token> tokens{};
for (int i = 0; i < out.length();) {
char c = out[i];
if (std::isdigit(c)) {
std::string content{};
do {
content += c;
c = out[++i];
} while (std::isdigit(c));
tokens.push_back(Token{ TokenType::LiteralInt, content });
}
else if (std::isalpha(c)) {
std::string content{};
do {
content += c;
c = out[++i];
} while (std::isalnum(c));
TokenType type = TokenType::Ident;
if (content == "return") {
type = TokenType::ReturnKeyword;
}
tokens.push_back(Token{ type, content });
}
else if (iswhitespace(c)) {
std::string content{};
do {
content += c;
c = out[++i];
} while (iswhitespace(c));
tokens.push_back(Token{ TokenType::Whitespace, content });
}
else {
tokens.push_back(Token{ TokenType::Symbol, std::string{c} });
i++;
}
}
for (Token token : tokens) {
std::cout << token << std::endl;
}
// LLVM Hello World
// llvm_hello_world();
std::cout << "hello" << std::endl;
return 0;
}
void llvm_hello_world() {
auto Context = std::make_unique<llvm::LLVMContext>(); auto Context = std::make_unique<llvm::LLVMContext>();
auto Module = std::make_unique<llvm::Module>("my cool module", *Context); auto Module = std::make_unique<llvm::Module>("my cool module", *Context);
@ -46,14 +177,6 @@ int main() {
auto TargetMachine = Target->createTargetMachine(TargetTriple, "generic", "", opt, llvm::Reloc::PIC_); auto TargetMachine = Target->createTargetMachine(TargetTriple, "generic", "", opt, llvm::Reloc::PIC_);
Module->setDataLayout(TargetMachine->createDataLayout()); Module->setDataLayout(TargetMachine->createDataLayout());
// auto Filename = "output.o";
// std::error_code EC;
// llvm::raw_fd_ostream dest(Filename, EC, llvm::sys::fs::OpenFlags::OF_Text);
// if (EC) {
// llvm::errs() << "Could not open file: " << EC.message();
// return 1;
// }
std::string out; std::string out;
llvm::raw_string_ostream dest{ out }; llvm::raw_string_ostream dest{ out };
@ -61,6 +184,4 @@ int main() {
dest.flush(); dest.flush();
std::cout << out << std::endl; std::cout << out << std::endl;
std::cout << "hello" << std::endl;
return 0;
} }

3
test.c Normal file
View File

@ -0,0 +1,3 @@
int main() {
return 5;
}