c-compiler/src/parsing.cpp
2026-04-16 19:43:36 +03:00

651 lines
28 KiB
C++

#include "types.h"
#include "parsing.h"
#include "tokens.h"
namespace parsing {
namespace {
static uint32_t struct_id_counter = 0;
Result<std::unique_ptr<AST::Expression>, std::string> parse_expression(token::TokenStream& stream, Scope& scope);
Result<std::shared_ptr<types::Type>, std::string> parse_type(token::TokenStream& stream, Scope& scope) {
token::TokenStream inner{ stream };
try {
bool is_const = false;
if (inner.peek().type == token::Type::Ident && inner.peek().content == "const") {
is_const = true;
}
auto token = inner.expect(token::Type::Ident);
std::shared_ptr<types::Type> returned{};
if (token.content == "struct") {
std::optional<std::string> struct_name{};
if (inner.peek().type == token::Type::Ident) {
struct_name = inner.expect(token::Type::Ident).content;
}
std::optional<std::vector<types::StructField>> maybe_fields{};
if (inner.peek().content == "{") {
std::vector<types::StructField> fields{};
inner.expect(token::Type::Symbol, "{");
int counter = 0;
while (inner.peek().content != "}") {
if (counter++ > 0)
inner.expect(token::Type::Symbol, ";");
auto ty = parse_type(inner, scope);
if (!ty.ok())
break;
auto field_name = inner.expect(token::Type::Ident);
fields.push_back(types::StructField{ field_name.content, ty.unwrap() });
}
inner.expect(token::Type::Symbol, "}");
maybe_fields = fields;
}
if (!struct_name && !maybe_fields) {
throw std::runtime_error("Struct must have a name or fields!");
}
if (struct_name && !maybe_fields && scope.structs.find(*struct_name) != scope.structs.end()) {
auto original_ty = scope.structs[*struct_name];
auto original_struct_ty = dynamic_cast<types::StructType*>(original_ty.get());
auto ty = new types::StructType{
is_const,
struct_name,
original_struct_ty->m_fields,
true,
false,
original_struct_ty->m_id
};
returned = std::shared_ptr<types::Type>{ ty };
}
else {
if (scope.structs.find(*struct_name) != scope.structs.end()) {
auto original_ty = scope.structs[*struct_name];
auto original_struct_ty = dynamic_cast<types::StructType*>(original_ty.get());
if (!original_struct_ty->m_fields.has_value()) {
auto ty = new types::StructType{
is_const,
struct_name,
maybe_fields,
false,
true,
original_struct_ty->m_id
};
returned = std::shared_ptr<types::Type>{ ty };
}
else {
auto ty = new types::StructType{
is_const,
struct_name,
maybe_fields,
false,
false,
struct_id_counter++
};
returned = std::shared_ptr<types::Type>{ ty };
}
}
else {
auto ty = new types::StructType{
is_const,
struct_name,
maybe_fields,
false,
false,
struct_id_counter++
};
returned = std::shared_ptr<types::Type>{ ty };
}
}
}
else {
// TODO eventually make this be potentially more than one word
std::string type_name = token.content;
if (type_name == "int") {
auto ty = new types::FundamentalType{ is_const, types::FundamentalTypeKind::Int };
returned = std::shared_ptr<types::Type>{ ty };
}
else if (type_name == "char") {
auto ty = new types::FundamentalType{ is_const, types::FundamentalTypeKind::Char };
returned = std::shared_ptr<types::Type>{ ty };
}
else if (type_name == "void") {
auto ty = new types::FundamentalType{ is_const, types::FundamentalTypeKind::Void };
returned = std::shared_ptr<types::Type>{ ty };
}
else {
throw std::runtime_error("Expected type name, got " + type_name);
}
}
while (inner.peek().type == token::Type::Symbol && inner.peek().content == "*") {
inner.next();
auto ptr_const = false;
if (inner.peek().type == token::Type::Ident && inner.peek().content == "const") {
inner.next();
ptr_const = true;
}
auto ty = new types::PointerType{ ptr_const, std::move(returned) };
returned = std::shared_ptr<types::Type>{ ty };
}
stream.m_position = inner.m_position;
return returned;
}
catch (std::runtime_error& error) {
return std::string{ error.what() };
}
}
Result<std::optional<uint32_t>, std::string> parse_array_postfix(token::TokenStream& stream, bool allow_empty, Scope&) {
token::TokenStream inner{ stream };
try {
std::optional<uint32_t> returned{};
inner.expect(token::Type::Symbol, "[");
if (inner.peek().type == token::Type::LiteralInt) {
returned = std::stoi(inner.next().content);
}
if (!allow_empty && !returned.has_value()) {
throw std::runtime_error("Expected array size");
}
inner.expect(token::Type::Symbol, "]");
stream.m_position = inner.m_position;
return returned;
}
catch (std::runtime_error& error) {
return std::string{ error.what() };
}
}
Result<std::unique_ptr<AST::Expression>, std::string> parse_list_initializer(token::TokenStream& stream, Scope& scope) {
token::TokenStream inner{ stream };
try {
auto before_meta = inner.metadata();
inner.expect(token::Type::Symbol, "{");
std::vector<std::unique_ptr<AST::Expression>> expressions{};
int counter = 0;
while (inner.peek().content != "}") {
if (counter++ > 0) {
inner.expect(token::Type::Symbol, ",");
}
expressions.push_back(parse_expression(inner, scope).unwrap());
}
inner.expect(token::Type::Symbol, "}");
stream.m_position = inner.m_position;
return std::unique_ptr<AST::Expression> {
new AST::ListInitializerExpression(
before_meta + inner.metadata(),
std::move(expressions),
std::shared_ptr<types::Type> {
new types::FundamentalType{ true, types::FundamentalTypeKind::Any }
})
};
}
catch (std::runtime_error& error) {
return std::string{ error.what() };
}
}
Result<std::unique_ptr<AST::Expression>, std::string> parse_plain_expression(token::TokenStream& stream, Scope& scope) {
token::TokenStream inner{ stream };
try {
if (auto list_init = parse_list_initializer(inner, scope); list_init.ok()) {
stream.m_position = inner.m_position;
return std::unique_ptr<AST::Expression> { list_init.unwrap() };
}
auto token = inner.next();
if (token.type == token::Type::LiteralInt) {
stream.m_position = inner.m_position;
auto expr = new AST::IntLiteralExpression{ token.metadata, std::stoi(token.content) };
return std::unique_ptr<AST::Expression>{ expr };
}
else if (token.type == token::Type::LiteralStr) {
stream.m_position = inner.m_position;
auto expr = new AST::StringLiteralExpression{ token.metadata, token.content };
return std::unique_ptr<AST::Expression>{ expr };
}
else if (token.type == token::Type::Ident) {
stream.m_position = inner.m_position;
auto expr = new AST::ValueReferenceExpression{ token.metadata, token.content };
return std::unique_ptr<AST::Expression>{ expr };
}
else {
throw std::runtime_error("Expected expression");
}
}
catch (std::runtime_error& error) {
return std::string{ error.what() };
}
}
Result<std::shared_ptr<types::Type>, std::string> parse_cast(token::TokenStream& stream, Scope& scope) {
token::TokenStream inner{ stream };
try {
inner.expect(token::Type::Symbol, "(");
auto ty = parse_type(inner, scope).unwrap();
inner.expect(token::Type::Symbol, ")");
stream.m_position = inner.m_position;
return ty;
}
catch (std::runtime_error& error) {
return std::string{ error.what() };
}
}
Result<std::unique_ptr<AST::Expression>, std::string> parse_primary_expression(token::TokenStream& stream, Scope& scope) {
token::TokenStream inner{ stream };
try {
auto before_meta = inner.metadata();
if (auto cast = parse_cast(inner, scope); cast.ok()) {
auto expr = parse_primary_expression(inner, scope).unwrap();
stream.m_position = inner.m_position;
return std::unique_ptr<AST::Expression>{
new AST::CastExpression{
before_meta + inner.metadata(),
cast.unwrap(),
std::move(expr)
}
};
}
else if (inner.peek().content == "(") {
inner.next();
auto expr = parse_expression(inner, scope).unwrap();
inner.expect(token::Type::Symbol, ")");
stream.m_position = inner.m_position;
return expr;
}
else if (inner.peek().content == "&") {
inner.next();
auto expr = parse_primary_expression(inner, scope).unwrap();
stream.m_position = inner.m_position;
return std::unique_ptr<AST::Expression> {
new AST::RefExpression(before_meta + inner.metadata(), std::move(expr))
};
}
else if (inner.peek().content == "*") {
inner.next();
auto expr = parse_primary_expression(inner, scope).unwrap();
stream.m_position = inner.m_position;
return std::unique_ptr<AST::Expression> {
new AST::DerefExpression(before_meta + inner.metadata(), std::move(expr))
};
}
auto plain_expr = parse_plain_expression(inner, scope);
while (inner.peek().content == "(" || inner.peek().content == "[" || inner.peek().content == ".") {
if (inner.peek().content == "(") {
inner.next();
std::vector<std::unique_ptr<AST::Expression>> args{};
int counter = 0;
while (inner.peek().content != ")") {
if (counter++ > 0)
inner.expect(token::Type::Symbol, ",");
args.push_back(parse_expression(inner, scope).unwrap());
}
inner.expect(token::Type::Symbol, ")");
auto fn_call = new AST::FunctionCallExpression{ before_meta + inner.metadata(), plain_expr.unwrap(), std::move(args) };
plain_expr = std::unique_ptr<AST::Expression>{ fn_call };
}
else if (auto postfix = parse_array_postfix(inner, false, scope); postfix.ok()) {
auto idx_expr = new AST::IndexAccessExpression{
before_meta + inner.metadata(), plain_expr.unwrap(), *postfix.unwrap() };
plain_expr = std::unique_ptr<AST::Expression>{ idx_expr };
}
else if (inner.peek().content == ".") {
inner.next();
std::string field_name = inner.expect(token::Type::Ident).content;
plain_expr = std::unique_ptr<AST::Expression>{
new AST::FieldAccessExpression{before_meta + inner.metadata(), plain_expr.unwrap(), field_name}
};
}
}
stream.m_position = inner.m_position;
return plain_expr;
}
catch (std::runtime_error& error) {
return std::string{ error.what() };
}
}
Result<types::BinOp, std::string> parse_binop(token::TokenStream& stream, Scope&) {
token::TokenStream inner{ stream };
try {
auto token = inner.next();
if (token.type != token::Type::Symbol) {
throw std::runtime_error("Expected binop");
}
else if (token.content == "=") {
stream.m_position = inner.m_position;
return types::BinOp{ types::BinOp::Assignment };
}
else if (token.content == "+") {
stream.m_position = inner.m_position;
return types::BinOp{ types::BinOp::Add };
}
else if (token.content == "-") {
stream.m_position = inner.m_position;
return types::BinOp{ types::BinOp::Sub };
}
else if (token.content == "<") {
stream.m_position = inner.m_position;
return types::BinOp{ types::BinOp::LessThan };
}
else if (token.content == ">") {
stream.m_position = inner.m_position;
return types::BinOp{ types::BinOp::GreaterThan };
}
throw std::runtime_error("Expected binop");
}
catch (std::runtime_error& error) {
return std::string{ error.what() };
}
}
std::unique_ptr<AST::Expression> parse_rhs(
token::TokenStream& stream, std::unique_ptr<AST::Expression> lhs, int prev_precedence, Scope& scope) {
auto before = stream.metadata();
auto binop_res = parse_binop(stream, scope);
while (binop_res.ok()) {
auto binop = binop_res.unwrap();
auto rhs = parse_primary_expression(stream, scope).unwrap();
if (types::operator_precedence(binop) > prev_precedence) {
rhs = parse_rhs(stream, std::move(rhs), types::operator_precedence(binop), scope);
}
auto binop_expr = new AST::BinaryOperationExpression{ before + stream.metadata(), std::move(lhs), binop, std::move(rhs) };
lhs = std::unique_ptr<AST::Expression>{ binop_expr };
binop_res = parse_binop(stream, scope);
}
return lhs;
}
Result<std::unique_ptr<AST::Expression>, std::string> parse_expression(token::TokenStream& stream, Scope& scope) {
try {
auto lhs = parse_primary_expression(stream, scope).unwrap();
return std::unique_ptr{ parse_rhs(stream, std::move(lhs), 0, scope) };
}
catch (std::runtime_error& error) {
return std::string{ error.what() };
}
}
Result<std::unique_ptr<AST::InitializationStatement>, std::string> parse_init_statement(token::TokenStream& stream, Scope& scope) {
token::TokenStream inner{ stream };
auto before_meta = inner.metadata();
try {
auto ty = parse_type(inner, scope).unwrap();
auto name = inner.expect(token::Type::Ident);
auto array_postfix = parse_array_postfix(inner, false, scope);
while (array_postfix.ok()) {
auto postfix = array_postfix.unwrap();
ty = std::shared_ptr<types::Type>{
new types::ArrayType(ty, *postfix, false)
};
array_postfix = parse_array_postfix(inner, false, scope);
}
std::optional<std::unique_ptr<AST::Expression>> expr = {};
if (inner.peek().type == token::Type::Symbol && inner.peek().content == "=") {
inner.expect(token::Type::Symbol, "=");
expr = parse_expression(inner, scope).unwrap();
}
inner.expect(token::Type::Symbol, ";");
stream.m_position = inner.m_position;
auto init = new AST::InitializationStatement{ before_meta + inner.metadata(), std::move(ty), name.content, std::move(expr) };
return std::unique_ptr<AST::InitializationStatement>{ init };
}
catch (std::runtime_error& error) {
return std::string{ error.what() };
}
}
Result<std::unique_ptr<AST::Statement>, std::string> parse_statement(token::TokenStream& stream, Scope& scope) {
token::TokenStream inner{ stream };
auto before_meta = inner.metadata();
try {
if (inner.peek().type == token::Type::ReturnKeyword) {
inner.next();
auto expression = parse_expression(inner, scope).unwrap();
inner.expect(token::Type::Symbol, ";");
stream.m_position = inner.m_position;
auto ret = new AST::ReturnStatement{ before_meta + stream.metadata(),std::move(expression) };
return std::unique_ptr<AST::Statement>{ ret };
}
else if (inner.peek().type == token::Type::IfKeyword) {
inner.next();
inner.expect(token::Type::Symbol, "(");
auto expression = parse_expression(inner, scope).unwrap();
inner.expect(token::Type::Symbol, ")");
auto then_statement = parse_statement(inner, scope).unwrap();
std::optional<std::unique_ptr<AST::Statement>> else_statement{};
if (inner.peek().type == token::Type::ElseKeyword) {
inner.next();
else_statement = parse_statement(inner, scope).unwrap();
}
stream.m_position = inner.m_position;
auto statement = new AST::IfStatement{
before_meta + stream.metadata(),
std::move(expression),
std::move(then_statement),
std::move(else_statement)
};
return std::unique_ptr<AST::Statement>{ statement };
}
else if (auto init = parse_init_statement(inner, scope); init.ok()) {
stream.m_position = inner.m_position;
return std::unique_ptr<AST::Statement>{ init.unwrap() };
}
else if (auto expr = parse_expression(inner, scope); expr.ok()) {
stream.m_position = inner.m_position;
stream.expect(token::Type::Symbol, ";");
auto expr_statement = new AST::ExpressionStatement{ before_meta + stream.metadata(), expr.unwrap() };
return std::unique_ptr<AST::Statement>{ expr_statement };
}
else {
throw std::runtime_error("Expected return-keyword");
}
}
catch (std::runtime_error& error) {
return std::string{ error.what() };
}
}
}
Result<std::unique_ptr<AST::TopLevelStatement>, std::string> parse_function(token::TokenStream& stream, Scope& scope) {
token::TokenStream inner{ stream };
auto before_meta = inner.metadata();
try {
auto type = parse_type(inner, scope).unwrap();
auto name_token = inner.expect(token::Type::Ident);
inner.expect(token::Type::Symbol, "(");
std::vector<std::pair<std::optional<std::string>, std::shared_ptr<types::Type>>> params;
bool is_vararg = false;
while (inner.peek().content != ")") {
if (params.size() > 0) {
inner.expect(token::Type::Symbol, ",");
}
if (inner.peek().content == ".") {
inner.next();
inner.expect(token::Type::Symbol, ".");
inner.expect(token::Type::Symbol, ".");
is_vararg = true;
break;
}
auto param_ty = parse_type(inner, scope).unwrap();
std::optional<std::string> param_name{};
if (inner.peek().type == token::Type::Ident) {
param_name = inner.expect(token::Type::Ident).content;
auto postfix = parse_array_postfix(inner, true, scope);
while (postfix.ok()) {
auto array_postfix = postfix.unwrap();
if (array_postfix) {
param_ty = std::shared_ptr<types::Type>{
new types::ArrayType(param_ty, *array_postfix, false)
};
}
else {
param_ty = std::shared_ptr<types::Type>{
new types::PointerType(true, param_ty)
};
}
postfix = parse_array_postfix(inner, true, scope);
}
}
params.push_back(std::pair(param_name, std::move(param_ty)));
}
inner.expect(token::Type::Symbol, ")");
auto inner_scope = parsing::Scope{ scope };
std::optional<std::vector<std::unique_ptr<AST::Statement>>> statements{};
if (inner.peek().content == "{") {
inner.expect(token::Type::Symbol, "{");
std::vector<std::unique_ptr<AST::Statement>> statement_list{};
auto statement = parse_statement(inner, inner_scope);
while (statement.ok()) {
statement_list.push_back(statement.unwrap());
statement = parse_statement(inner, inner_scope);
}
statements = std::optional{ std::move(statement_list) };
inner.expect(token::Type::Symbol, "}");
}
else {
inner.expect(token::Type::Symbol, ";");
}
stream.m_position = inner.m_position;
auto fun = new AST::Function{
before_meta + stream.metadata(),
std::move(type),
std::move(params),
is_vararg,
name_token.content,
std::move(statements)
};
return std::unique_ptr<AST::TopLevelStatement>{ fun };
}
catch (std::runtime_error& error) {
return std::string(error.what());
}
}
Result<std::unique_ptr<AST::TopLevelStatement>, std::string> parse_tl_typedef(token::TokenStream& stream, Scope& scope) {
token::TokenStream inner{ stream };
auto before_meta = inner.metadata();
try {
auto ty = parse_type(inner, scope).unwrap();
inner.expect(token::Type::Symbol, ";");
if (ty->m_kind == types::TypeKind::Struct) {
auto struct_ty = dynamic_cast<types::StructType*>(ty.get());
if (!struct_ty->m_is_ref && struct_ty->m_name) {
if (scope.structs.find(*struct_ty->m_name) != scope.structs.end() && struct_ty->m_is_def) {
auto true_ty = dynamic_cast<types::StructType*>(scope.structs[*struct_ty->m_name].get());
true_ty->m_fields = struct_ty->m_fields;
}
else {
scope.structs[*struct_ty->m_name] = ty;
}
}
}
stream.m_position = inner.m_position;
auto tl_typedef = new AST::TopLevelTypedef{
before_meta + stream.metadata(), ty
};
return std::unique_ptr<AST::TopLevelStatement>{tl_typedef};
}
catch (std::runtime_error& error) {
return std::string(error.what());
}
}
Result<std::unique_ptr<AST::TopLevelStatement>, std::string> parse_top_level_statement(token::TokenStream& stream, Scope& scope) {
token::TokenStream inner{ stream };
auto before_meta = inner.metadata();
try {
if (auto func = parse_function(inner, scope); func.ok()) {
stream.m_position = inner.m_position;
return func.unwrap();
}
else if (auto tl_typedef = parse_tl_typedef(inner, scope); tl_typedef.ok()) {
stream.m_position = inner.m_position;
return tl_typedef.unwrap();
}
else {
throw std::runtime_error("Expected top-level statement, got " + inner.peek().formatted());
}
}
catch (std::runtime_error& error) {
return std::string(error.what());
}
}
}