Move tokens to it's own file

2026-04-01 22:13:23 +03:00 · 2026-04-01 22:13:23 +03:00 · ad04da8218
commit ad04da8218
parent 92e0bcd270
4 changed files with 121 additions and 91 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -15,7 +15,7 @@ separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
 add_definitions(${LLVM_DEFINITIONS_LIST})
 # Executable
-add_executable(${PROJECT_NAME} src/main.cpp)
+add_executable(${PROJECT_NAME} src/main.cpp src/tokens.cpp)
 target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20)
 # Find the libraries that correspond to the LLVM components
--- a/src/main.cpp
+++ b/src/main.cpp
@ -16,6 +16,8 @@
 #include <fstream>
 #include <cctype>
 #include "tokens.h"
 void llvm_hello_world();
 std::string read_file(char* filepath) {
@ -37,105 +39,17 @@ std::string read_file(char* filepath) {
    return out;
 }
 enum class TokenType {
    Ident,
    Symbol,
    LiteralInt,
    ReturnKeyword,
    Whitespace,
 };
 struct Token {
    TokenType type;
    std::string content;
    std::string name() {
        switch (type) {
        case TokenType::Ident:
            return "Ident";
        case TokenType::Symbol:
            return "Symbol";
        case TokenType::LiteralInt:
            return "LiteralInt";
        case TokenType::ReturnKeyword:
            return "Return";
        case TokenType::Whitespace:
            return "Whitespace";
        default:
            return "Unknown";
        }
    }
 };
 std::ostream& operator<<(std::ostream& stream, Token& token) {
    stream << token.name() << "(" << token.content << ")";
    return stream;
 }
 bool iswhitespace(char& character) {
    return character == ' '
        || character == '\t'
        || character == '\n'
        || character == '\r';
 }
 int main() {
    std::string out{ read_file("test.c") };
    std::cout << out << std::endl;
-    std::vector<Token> tokens{};
+    auto tokens = token::tokenize(out);
-
+    for (token::Token token : tokens) {
    for (int i = 0; i < out.length();) {
        char c = out[i];
        if (std::isdigit(c)) {
            std::string content{};
            do {
                content += c;
                c = out[++i];
            } while (std::isdigit(c));
            tokens.push_back(Token{ TokenType::LiteralInt, content });
        }
        else if (std::isalpha(c)) {
            std::string content{};
            do {
                content += c;
                c = out[++i];
            } while (std::isalnum(c));
            TokenType type = TokenType::Ident;
            if (content == "return") {
                type = TokenType::ReturnKeyword;
            }
            tokens.push_back(Token{ type, content });
        }
        else if (iswhitespace(c)) {
            std::string content{};
            do {
                content += c;
                c = out[++i];
            } while (iswhitespace(c));
            tokens.push_back(Token{ TokenType::Whitespace, content });
        }
        else {
            tokens.push_back(Token{ TokenType::Symbol, std::string{c} });
            i++;
        }
    }
    for (Token token : tokens) {
        std::cout << token << std::endl;
    }
    // LLVM Hello World
    // llvm_hello_world();
--- a/src/tokens.cpp
+++ b/src/tokens.cpp
@ -0,0 +1,85 @@
 #include "tokens.h"
 #include <string>
 #include <cctype>
 #include <vector>
 #include <iostream>
 static bool iswhitespace(char& character) {
    return character == ' '
        || character == '\t'
        || character == '\n'
        || character == '\r';
 }
 namespace token {
    std::string Token::name() {
        switch (type) {
        case token::Type::Ident:
            return "Ident";
        case token::Type::Symbol:
            return "Symbol";
        case token::Type::LiteralInt:
            return "LiteralInt";
        case token::Type::ReturnKeyword:
            return "Return";
        case token::Type::Whitespace:
            return "Whitespace";
        default:
            return "Unknown";
        }
    }
    std::ostream& operator<<(std::ostream& stream, Token& token) {
        stream << token.name() << "(" << token.content << ")";
        return stream;
    }
    std::vector<token::Token> tokenize(std::string_view text) {
        std::vector<token::Token> tokens{};
        for (int i = 0; i < text.length();) {
            char c = text[i];
            if (std::isdigit(c)) {
                std::string content{};
                do {
                    content += c;
                    c = text[++i];
                } while (std::isdigit(c));
                tokens.push_back(token::Token{ token::Type::LiteralInt, content });
            }
            else if (std::isalpha(c)) {
                std::string content{};
                do {
                    content += c;
                    c = text[++i];
                } while (std::isalnum(c));
                token::Type type = token::Type::Ident;
                if (content == "return") {
                    type = token::Type::ReturnKeyword;
                }
                tokens.push_back(token::Token{ type, content });
            }
            else if (iswhitespace(c)) {
                std::string content{};
                do {
                    content += c;
                    c = text[++i];
                } while (iswhitespace(c));
                tokens.push_back(token::Token{ token::Type::Whitespace, content });
            }
            else {
                tokens.push_back(token::Token{ token::Type::Symbol, std::string{c} });
                i++;
            }
        }
        return tokens;
    }
 }
--- a/src/tokens.h
+++ b/src/tokens.h
@ -0,0 +1,31 @@
 #ifndef TOKENS_H
 #define TOKENS_H
 #include <string>
 #include <vector>
 #include <iostream>
 namespace token {
    enum class Type {
        Ident,
        Symbol,
        LiteralInt,
        ReturnKeyword,
        Whitespace,
    };
    struct Token {
        Type type;
        std::string content;
        std::string name();
    };
    std::ostream& operator<<(std::ostream& stream, Token& token);
    std::vector<Token> tokenize(std::string_view text);
 }
 #endif