Add metadata to tokens

This commit is contained in:
Sofia 2026-04-10 23:40:59 +03:00
parent 07b691ba7a
commit d902569f7b
4 changed files with 74 additions and 11 deletions

32
.vscode/tasks.json vendored Normal file
View File

@ -0,0 +1,32 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "cppbuild",
"label": "C/C++: gcc build active file",
"command": "/usr/bin/gcc",
"args": [
"-fdiagnostics-color=always",
"-g",
"${file}",
"-o",
"${fileDirname}/${fileBasenameNoExtension}",
"-Wall",
"-Weffc++",
"-Wextra",
"-Werror"
],
"options": {
"cwd": "${fileDirname}"
},
"problemMatcher": [
"$gcc"
],
"group": {
"kind": "build",
"isDefault": true
},
"detail": "compiler: /usr/bin/gcc"
}
]
}

View File

@ -49,7 +49,7 @@ int main() {
std::cout << out << std::endl;
auto tokens = token::tokenize(out);
auto tokens = token::tokenize(out, filename);
for (token::Token token : tokens) {
std::cout << token << std::endl;
}
@ -88,7 +88,7 @@ int main() {
tls->codegen(builder, scope);
}
}
catch (std::runtime_error error) {
catch (std::runtime_error& error) {
std::cerr << "FATAL: " << error.what() << std::endl;
return 1;
}

View File

@ -4,6 +4,7 @@
#include <cctype>
#include <vector>
#include <iostream>
#include <sstream>
static bool iswhitespace(char& character) {
@ -14,7 +15,6 @@ static bool iswhitespace(char& character) {
}
namespace token {
std::string type_name(Type& type) {
switch (type) {
case token::Type::Ident:
@ -43,7 +43,13 @@ namespace token {
}
std::string Token::formatted() {
return type_name(this->type) + "(" + this->content + ")";
std::stringstream out{ "" };
out << type_name(this->type);
out << "(" << this->content << ")";
out << " at line " << this->metadata.position.line + 1
<< " col " << this->metadata.position.col + 1;
return out.str();
}
std::ostream& operator<<(std::ostream& stream, Token& token) {
@ -58,7 +64,7 @@ namespace token {
Token TokenStream::peek(int length) {
int new_pos = m_position + length;
if (new_pos < 0 || new_pos > static_cast<int>(m_tokens.size())) {
return Token{ Type::Eof, {} };
return Token{ Type::Eof, {}, {} };
}
return m_tokens[new_pos];
}
@ -89,10 +95,16 @@ namespace token {
throw std::runtime_error("Expected " + type_name(type) + "(" + std::string{ content } + "), got " + next.formatted());
}
std::vector<token::Token> tokenize(std::string_view text) {
std::vector<token::Token> tokenize(std::string_view text, std::string filename) {
std::vector<token::Token> tokens{};
uint32_t line = 0;
uint32_t line_start = 0;
for (int i = 0; i < static_cast<int>(text.length());) {
Position position{ line, i - line_start };
Metadata meta{ position, filename };
char c = text[i];
if (std::isdigit(c)) {
@ -101,7 +113,7 @@ namespace token {
content += c;
c = text[++i];
} while (std::isdigit(c));
tokens.push_back(token::Token{ token::Type::LiteralInt, content });
tokens.push_back(token::Token{ token::Type::LiteralInt, content, meta });
}
else if (std::isalpha(c)) {
std::string content{};
@ -120,11 +132,15 @@ namespace token {
else if (content == "else") {
type = token::Type::ElseKeyword;
}
tokens.push_back(token::Token{ type, content });
tokens.push_back(token::Token{ type, content, meta });
}
else if (iswhitespace(c)) {
std::string content{};
do {
if (c == '\n') {
line++;
line_start = i + 1;
}
content += c;
c = text[++i];
} while (iswhitespace(c));
@ -132,12 +148,15 @@ namespace token {
// Just skip whitespace tokens
}
else {
tokens.push_back(token::Token{ token::Type::Symbol, std::string{c} });
tokens.push_back(token::Token{ token::Type::Symbol, std::string{c}, meta });
i++;
}
}
tokens.push_back(token::Token{ token::Type::Eof, {} });
Position position{ line, static_cast<uint32_t>(text.length()) - line_start };
Metadata meta{ position, filename };
tokens.push_back(token::Token{ token::Type::Eof, {}, meta });
return tokens;
}

View File

@ -4,6 +4,7 @@
#include <string>
#include <vector>
#include <iostream>
#include <cstdint>
namespace token {
enum class Type {
@ -22,9 +23,20 @@ namespace token {
std::string type_name(Type& type);
struct Position {
uint32_t line;
uint32_t col;
};
struct Metadata {
Position position;
std::string filename;
};
struct Token {
Type type;
std::string content;
Metadata metadata;
std::string formatted();
};
@ -44,7 +56,7 @@ namespace token {
std::ostream& operator<<(std::ostream& stream, Token& token);
std::vector<Token> tokenize(std::string_view text);
std::vector<Token> tokenize(std::string_view text, std::string filename);
}
#endif