#include "tokens.h" #include #include #include #include #include #include static bool iswhitespace(char& character) { return character == ' ' || character == '\t' || character == '\n' || character == '\r'; } static std::optional get_escaped(std::string_view inspected) { if (inspected == "n") return '\n'; if (inspected == "r") return '\r'; if (inspected == "t") return '\t'; if (inspected == "\\") return '\\'; if (inspected.size() <= 3) { for (char c : inspected) { std::cout << c << std::endl; if (!std::isdigit(c)) return {}; if (std::stoi(std::string{ c }) > 8) return {}; } unsigned int x; std::stringstream ss; ss << std::oct << inspected; ss >> x; return static_cast(x); } return {}; } namespace token { std::string type_name(Type& type) { switch (type) { case token::Type::Ident: return "Ident"; case token::Type::Symbol: return "Symbol"; case token::Type::LiteralInt: return "LiteralInt"; case token::Type::LiteralStr: return "LiteralStr"; case token::Type::ReturnKeyword: return "Return"; case token::Type::IfKeyword: return "If"; case token::Type::ElseKeyword: return "Else"; case token::Type::Whitespace: return "Whitespace"; case token::Type::Eof: return "EOF"; default: return "Unknown"; } } std::string Token::formatted() { std::stringstream out{ "" }; out << type_name(this->type); out << "(" << this->content << ")"; out << " at line " << this->metadata.start.line + 1 << " col " << this->metadata.start.col + 1; return out.str(); } std::ostream& operator<<(std::ostream& stream, Token& token) { stream << token.formatted(); return stream; } Metadata operator+(Metadata meta, Metadata other) { return Metadata{ Position { std::min(meta.start.line, other.start.line), std::min(meta.start.col, other.start.col), }, Position { std::max(meta.start.line, other.start.line), std::max(meta.start.col, other.start.col), }, meta.filename }; } Metadata operator+(Metadata& meta, int length) { return Metadata{ meta.start, Position {meta.end.line, meta.end.col + length}, meta.filename, }; } TokenStream::TokenStream(std::vector& tokens) : m_tokens{ tokens }, m_position{ 0 } { }; Token TokenStream::peek(int length) { int new_pos = m_position + length; if (new_pos < 0 || new_pos > static_cast(m_tokens.size())) { return Token{ Type::Eof, {}, {} }; } return m_tokens[new_pos]; } Token TokenStream::peek() { return this->peek(0); } Token TokenStream::next() { token::Token got = this->peek(0); m_position++; while (m_position < static_cast(m_tokens.size()) && this->peek().type == Type::Whitespace) { m_position++; } return got; } Token TokenStream::expect(Type type) { auto next = this->next(); if (next.type == type) { return next; } throw std::runtime_error("Expected " + type_name(type) + ", got " + next.formatted()); } Token TokenStream::expect(Type type, std::string_view content) { auto next = this->next(); if (next.type == type && next.content == content) { return next; } throw std::runtime_error("Expected " + type_name(type) + "(" + std::string{ content } + "), got " + next.formatted()); } Metadata TokenStream::metadata() { return this->peek(0).metadata; } std::vector tokenize(std::string_view text, std::string filename) { std::vector tokens{}; uint32_t line = 0; uint32_t line_start = 0; int text_length = static_cast(text.length()); for (int i = 0; i < text_length;) { Position position{ line, i - line_start }; Metadata meta{ position, position, filename }; char c = text[i]; if (std::isdigit(c)) { std::string content{}; do { content += c; if ((i + 1) >= text_length) break; c = text[++i]; } while (std::isdigit(c)); tokens.push_back(token::Token{ token::Type::LiteralInt, content, meta + content.size() }); } else if (c == '\"') { std::string content{}; c = text[++i]; // Skip initial " do { if (c == '\\') { std::string escaped_content{}; if ((i + 1) >= text_length) break; auto potential = get_escaped(escaped_content + text[++i]); while (potential.has_value() && (i + 1) < text_length) { escaped_content += text[i]; potential = get_escaped(escaped_content + text[++i]); } if (escaped_content.size() > 0) { auto escaped = get_escaped(escaped_content); if (escaped.has_value()) content += *escaped; } c = text[i]; } else { content += c; if ((i + 1) >= text_length) break; c = text[++i]; } } while (c != '\"'); i++; // Skip second " tokens.push_back(token::Token{ token::Type::LiteralStr, content, meta + (content.size() + 2) }); } else if (std::isalpha(c) || c == '_') { std::string content{}; do { content += c; if ((i + 1) >= text_length) break; c = text[++i]; } while (std::isalnum(c) || c == '_'); token::Type type = token::Type::Ident; if (content == "return") { type = token::Type::ReturnKeyword; } else if (content == "if") { type = token::Type::IfKeyword; } else if (content == "else") { type = token::Type::ElseKeyword; } tokens.push_back(token::Token{ type, content, meta + content.size() }); } else if (iswhitespace(c)) { std::string content{}; do { if (c == '\n') { line++; line_start = i + 1; } content += c; if ((i + 1) >= text_length) break; c = text[++i]; } while (iswhitespace(c)); tokens.push_back(token::Token{ token::Type::Whitespace, content, meta + content.size() }); } else { tokens.push_back(token::Token{ token::Type::Symbol, std::string{c}, meta }); i++; } } Position position{ line, static_cast(text.length()) - line_start }; Metadata meta{ position, position, filename }; tokens.push_back(token::Token{ token::Type::Eof, {}, meta }); return tokens; } }