#include "tokens.h" #include #include #include #include #include static bool iswhitespace(char& character) { return character == ' ' || character == '\t' || character == '\n' || character == '\r'; } namespace token { std::string type_name(Type& type) { switch (type) { case token::Type::Ident: return "Ident"; case token::Type::Symbol: return "Symbol"; case token::Type::LiteralInt: return "LiteralInt"; case token::Type::LiteralStr: return "LiteralStr"; case token::Type::ReturnKeyword: return "Return"; case token::Type::IfKeyword: return "If"; case token::Type::ElseKeyword: return "Else"; case token::Type::Whitespace: return "Whitespace"; case token::Type::Eof: return "EOF"; default: return "Unknown"; } } std::string Token::formatted() { std::stringstream out{ "" }; out << type_name(this->type); out << "(" << this->content << ")"; out << " at line " << this->metadata.start.line + 1 << " col " << this->metadata.start.col + 1; return out.str(); } std::ostream& operator<<(std::ostream& stream, Token& token) { stream << token.formatted(); return stream; } Metadata operator+(Metadata meta, Metadata other) { return Metadata{ Position { std::min(meta.start.line, other.start.line), std::min(meta.start.col, other.start.col), }, Position { std::max(meta.start.line, other.start.line), std::max(meta.start.col, other.start.col), }, meta.filename }; } Metadata operator+(Metadata& meta, int length) { return Metadata{ meta.start, Position {meta.end.line, meta.end.col + length}, meta.filename, }; } TokenStream::TokenStream(std::vector& tokens) : m_tokens{ tokens }, m_position{ 0 } { }; Token TokenStream::peek(int length) { int new_pos = m_position + length; if (new_pos < 0 || new_pos > static_cast(m_tokens.size())) { return Token{ Type::Eof, {}, {} }; } return m_tokens[new_pos]; } Token TokenStream::peek() { return this->peek(0); } Token TokenStream::next() { token::Token got = this->peek(0); m_position++; while (m_position < static_cast(m_tokens.size()) && this->peek().type == Type::Whitespace) { m_position++; } return got; } Token TokenStream::expect(Type type) { auto next = this->next(); if (next.type == type) { return next; } throw std::runtime_error("Expected " + type_name(type) + ", got " + next.formatted()); } Token TokenStream::expect(Type type, std::string_view content) { auto next = this->next(); if (next.type == type && next.content == content) { return next; } throw std::runtime_error("Expected " + type_name(type) + "(" + std::string{ content } + "), got " + next.formatted()); } Metadata TokenStream::metadata() { return this->peek(0).metadata; } std::vector tokenize(std::string_view text, std::string filename) { std::vector tokens{}; uint32_t line = 0; uint32_t line_start = 0; int text_length = static_cast(text.length()); for (int i = 0; i < text_length;) { Position position{ line, i - line_start }; Metadata meta{ position, position, filename }; char c = text[i]; if (std::isdigit(c)) { std::string content{}; do { content += c; if ((i + 1) >= text_length) break; c = text[++i]; } while (std::isdigit(c)); tokens.push_back(token::Token{ token::Type::LiteralInt, content, meta + content.size() }); } else if (c == '\"') { std::string content{}; c = text[++i]; // Skip initial " do { content += c; if ((i + 1) >= text_length) break; c = text[++i]; } while (c != '\"'); i++; // Skip second " tokens.push_back(token::Token{ token::Type::LiteralStr, content, meta + (content.size() + 2) }); } else if (std::isalpha(c)) { std::string content{}; do { content += c; if ((i + 1) >= text_length) break; c = text[++i]; } while (std::isalnum(c)); token::Type type = token::Type::Ident; if (content == "return") { type = token::Type::ReturnKeyword; } else if (content == "if") { type = token::Type::IfKeyword; } else if (content == "else") { type = token::Type::ElseKeyword; } tokens.push_back(token::Token{ type, content, meta + content.size() }); } else if (iswhitespace(c)) { std::string content{}; do { if (c == '\n') { line++; line_start = i + 1; } content += c; if ((i + 1) >= text_length) break; c = text[++i]; } while (iswhitespace(c)); tokens.push_back(token::Token{ token::Type::Whitespace, content, meta + content.size() }); } else { tokens.push_back(token::Token{ token::Type::Symbol, std::string{c}, meta }); i++; } } Position position{ line, static_cast(text.length()) - line_start }; Metadata meta{ position, position, filename }; tokens.push_back(token::Token{ token::Type::Eof, {}, meta }); return tokens; } }