diff --git a/CMakeLists.txt b/CMakeLists.txt index 3545b8c..667c981 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) add_definitions(${LLVM_DEFINITIONS_LIST}) # Executable -add_executable(${PROJECT_NAME} src/main.cpp) +add_executable(${PROJECT_NAME} src/main.cpp src/tokens.cpp) target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20) # Find the libraries that correspond to the LLVM components diff --git a/src/main.cpp b/src/main.cpp index bc0daa2..4cc6bf4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -16,6 +16,8 @@ #include #include +#include "tokens.h" + void llvm_hello_world(); std::string read_file(char* filepath) { @@ -37,105 +39,17 @@ std::string read_file(char* filepath) { return out; } -enum class TokenType { - Ident, - Symbol, - LiteralInt, - - ReturnKeyword, - - Whitespace, -}; - -struct Token { - TokenType type; - std::string content; - - std::string name() { - switch (type) { - case TokenType::Ident: - return "Ident"; - case TokenType::Symbol: - return "Symbol"; - case TokenType::LiteralInt: - return "LiteralInt"; - - case TokenType::ReturnKeyword: - return "Return"; - - case TokenType::Whitespace: - return "Whitespace"; - - default: - return "Unknown"; - } - } -}; - -std::ostream& operator<<(std::ostream& stream, Token& token) { - stream << token.name() << "(" << token.content << ")"; - return stream; -} - -bool iswhitespace(char& character) { - return character == ' ' - || character == '\t' - || character == '\n' - || character == '\r'; -} - int main() { std::string out{ read_file("test.c") }; std::cout << out << std::endl; - std::vector tokens{}; - - for (int i = 0; i < out.length();) { - char c = out[i]; - - if (std::isdigit(c)) { - std::string content{}; - do { - content += c; - c = out[++i]; - } while (std::isdigit(c)); - tokens.push_back(Token{ TokenType::LiteralInt, content }); - } - else if (std::isalpha(c)) { - std::string content{}; - do { - content += c; - c = out[++i]; - } while (std::isalnum(c)); - - TokenType type = TokenType::Ident; - if (content == "return") { - type = TokenType::ReturnKeyword; - } - tokens.push_back(Token{ type, content }); - } - else if (iswhitespace(c)) { - std::string content{}; - do { - content += c; - c = out[++i]; - } while (iswhitespace(c)); - tokens.push_back(Token{ TokenType::Whitespace, content }); - } - else { - tokens.push_back(Token{ TokenType::Symbol, std::string{c} }); - i++; - } - } - - for (Token token : tokens) { + auto tokens = token::tokenize(out); + for (token::Token token : tokens) { std::cout << token << std::endl; } - - // LLVM Hello World // llvm_hello_world(); diff --git a/src/tokens.cpp b/src/tokens.cpp new file mode 100644 index 0000000..145000a --- /dev/null +++ b/src/tokens.cpp @@ -0,0 +1,85 @@ +#include "tokens.h" + +#include +#include +#include +#include + + +static bool iswhitespace(char& character) { + return character == ' ' + || character == '\t' + || character == '\n' + || character == '\r'; +} + +namespace token { + std::string Token::name() { + switch (type) { + case token::Type::Ident: + return "Ident"; + case token::Type::Symbol: + return "Symbol"; + case token::Type::LiteralInt: + return "LiteralInt"; + + case token::Type::ReturnKeyword: + return "Return"; + + case token::Type::Whitespace: + return "Whitespace"; + + default: + return "Unknown"; + } + } + + std::ostream& operator<<(std::ostream& stream, Token& token) { + stream << token.name() << "(" << token.content << ")"; + return stream; + } + + std::vector tokenize(std::string_view text) { + std::vector tokens{}; + + for (int i = 0; i < text.length();) { + char c = text[i]; + + if (std::isdigit(c)) { + std::string content{}; + do { + content += c; + c = text[++i]; + } while (std::isdigit(c)); + tokens.push_back(token::Token{ token::Type::LiteralInt, content }); + } + else if (std::isalpha(c)) { + std::string content{}; + do { + content += c; + c = text[++i]; + } while (std::isalnum(c)); + + token::Type type = token::Type::Ident; + if (content == "return") { + type = token::Type::ReturnKeyword; + } + tokens.push_back(token::Token{ type, content }); + } + else if (iswhitespace(c)) { + std::string content{}; + do { + content += c; + c = text[++i]; + } while (iswhitespace(c)); + tokens.push_back(token::Token{ token::Type::Whitespace, content }); + } + else { + tokens.push_back(token::Token{ token::Type::Symbol, std::string{c} }); + i++; + } + } + + return tokens; + } +} \ No newline at end of file diff --git a/src/tokens.h b/src/tokens.h new file mode 100644 index 0000000..c2e33a5 --- /dev/null +++ b/src/tokens.h @@ -0,0 +1,31 @@ +#ifndef TOKENS_H +#define TOKENS_H + +#include +#include +#include + +namespace token { + enum class Type { + Ident, + Symbol, + LiteralInt, + + ReturnKeyword, + + Whitespace, + }; + + struct Token { + Type type; + std::string content; + + std::string name(); + }; + + std::ostream& operator<<(std::ostream& stream, Token& token); + + std::vector tokenize(std::string_view text); +} + +#endif \ No newline at end of file