From 9710d17e00423f3fa59bf3d5d6ccbdc093b4627e Mon Sep 17 00:00:00 2001 From: sofia Date: Wed, 9 Jul 2025 20:01:24 +0300 Subject: [PATCH] Add a bunch of documentation --- reid-llvm-lib/src/builder.rs | 8 +++--- reid-llvm-lib/src/compile.rs | 3 +++ reid-llvm-lib/src/debug.rs | 1 + reid-llvm-lib/src/lib.rs | 3 +++ reid-llvm-lib/src/util.rs | 4 +++ reid/src/codegen.rs | 32 +++++++++++++++--------- reid/src/lexer.rs | 5 +++- reid/src/lib.rs | 23 ++++++++--------- reid/src/mir/display.rs | 2 +- reid/src/mir/mod.rs | 3 +-- reid/src/mir/pass.rs | 2 ++ reid/src/mir/typecheck.rs | 45 +++++++++++++++++++++++++-------- reid/src/mir/types.rs | 9 ------- reid/src/pad_adapter.rs | 48 +++++++++++++++++------------------- reid/src/token_stream.rs | 7 ++++++ reid/src/util.rs | 3 +++ 16 files changed, 121 insertions(+), 77 deletions(-) diff --git a/reid-llvm-lib/src/builder.rs b/reid-llvm-lib/src/builder.rs index 9c99b7a..a993d49 100644 --- a/reid-llvm-lib/src/builder.rs +++ b/reid-llvm-lib/src/builder.rs @@ -1,3 +1,5 @@ +/// This module contains simply [`Builder`] and it's related utility Values. +/// Builder is the actual struct being modified when building the LLIR. use std::{cell::RefCell, rc::Rc}; use crate::{ @@ -45,7 +47,7 @@ pub struct InstructionHolder { } #[derive(Clone)] -pub struct Builder { +pub(crate) struct Builder { modules: Rc>>, } @@ -237,7 +239,7 @@ impl Builder { } impl InstructionValue { - pub fn get_type(&self, builder: &Builder) -> Result { + pub(crate) fn get_type(&self, builder: &Builder) -> Result { use InstructionKind::*; unsafe { match &builder.instr_data(self).kind { @@ -316,7 +318,7 @@ impl Type { } impl TerminatorKind { - pub fn get_type(&self, builder: &Builder) -> Result { + pub(crate) fn get_type(&self, builder: &Builder) -> Result { use TerminatorKind::*; match self { Ret(instr_val) => instr_val.get_type(builder), diff --git a/reid-llvm-lib/src/compile.rs b/reid-llvm-lib/src/compile.rs index 6058913..768f28e 100644 --- a/reid-llvm-lib/src/compile.rs +++ b/reid-llvm-lib/src/compile.rs @@ -1,3 +1,6 @@ +/// This module contains all of the relevant code that is needed to compile the +/// LLIR ([`Context`]) into LLVM IR. This module is the only one that interfaces +/// with the LLVM API. use std::{collections::HashMap, ffi::CString, ptr::null_mut}; use llvm_sys::{ diff --git a/reid-llvm-lib/src/debug.rs b/reid-llvm-lib/src/debug.rs index 9eb1aa2..1eeb0dc 100644 --- a/reid-llvm-lib/src/debug.rs +++ b/reid-llvm-lib/src/debug.rs @@ -1,3 +1,4 @@ +/// Debug implementations for relevant types use std::fmt::{Debug, Write}; use crate::{CmpPredicate, InstructionData, InstructionKind, TerminatorKind, builder::*}; diff --git a/reid-llvm-lib/src/lib.rs b/reid-llvm-lib/src/lib.rs index d07f4f3..993d93a 100644 --- a/reid-llvm-lib/src/lib.rs +++ b/reid-llvm-lib/src/lib.rs @@ -1,3 +1,6 @@ +/// Reid LLVM Lib is an ergonomic Rust'y API which is used to produce a +/// Low-Level IR (LLIR) using [`Context`] and [`Builder`]. This Builder can then +/// be used at the end to compile said LLIR into LLVM IR. use std::marker::PhantomData; use builder::{BlockValue, Builder, FunctionValue, InstructionValue, ModuleValue}; diff --git a/reid-llvm-lib/src/util.rs b/reid-llvm-lib/src/util.rs index 574fb11..7319d1b 100644 --- a/reid-llvm-lib/src/util.rs +++ b/reid-llvm-lib/src/util.rs @@ -29,6 +29,8 @@ fn cstring_to_err(value: *mut c_char) -> Result<(), String> { .map_or(Ok(()), |s| Err(s)) } +/// Utility struct for LLVM's Error Messages, which need to be disposed +/// manually. pub struct ErrorMessageHolder(*mut c_char); impl ErrorMessageHolder { @@ -55,6 +57,8 @@ impl Drop for ErrorMessageHolder { } } +/// Make sure types for given instructions match. Return Ok(type) if they do, +/// and error otherwise. pub fn match_types( lhs: &InstructionValue, rhs: &InstructionValue, diff --git a/reid/src/codegen.rs b/reid/src/codegen.rs index 9ee27e4..b1b11d2 100644 --- a/reid/src/codegen.rs +++ b/reid/src/codegen.rs @@ -7,12 +7,16 @@ use reid_lib::{ use crate::mir::{self, types::ReturnType, TypeKind, VariableReference}; +/// Context that contains all of the given modules as complete codegenerated +/// LLIR that can then be finally compiled into LLVM IR. #[derive(Debug)] pub struct CodegenContext<'ctx> { - pub modules: Vec>, + modules: Vec>, } impl<'ctx> CodegenContext<'ctx> { + /// Compile contained LLIR into LLVM IR and produce `hello.o` and + /// `hello.asm` pub fn compile(&self) { for module in &self.modules { module.context.compile(); @@ -21,6 +25,7 @@ impl<'ctx> CodegenContext<'ctx> { } impl mir::Context { + /// Compile MIR [`Context`] into [`CodegenContext`] containing LLIR. pub fn codegen<'ctx>(&self, context: &'ctx Context) -> CodegenContext<'ctx> { let mut modules = Vec::new(); for module in &self.modules { @@ -30,9 +35,9 @@ impl mir::Context { } } -pub struct ModuleCodegen<'ctx> { +struct ModuleCodegen<'ctx> { pub context: &'ctx Context, - pub module: Module<'ctx>, + _module: Module<'ctx>, } impl<'ctx> std::fmt::Debug for ModuleCodegen<'ctx> { @@ -93,7 +98,10 @@ impl mir::Module { } } - ModuleCodegen { context, module } + ModuleCodegen { + context, + _module: module, + } } } @@ -107,7 +115,7 @@ pub struct Scope<'ctx, 'a> { } impl<'ctx, 'a> Scope<'ctx, 'a> { - pub fn with_block(&self, block: Block<'ctx>) -> Scope<'ctx, 'a> { + fn with_block(&self, block: Block<'ctx>) -> Scope<'ctx, 'a> { Scope { block, function: self.function, @@ -120,7 +128,7 @@ impl<'ctx, 'a> Scope<'ctx, 'a> { /// Takes the block out from this scope, swaps the given block in it's place /// and returns the old block. - pub fn swap_block(&mut self, block: Block<'ctx>) -> Block<'ctx> { + fn swap_block(&mut self, block: Block<'ctx>) -> Block<'ctx> { let mut old_block = block; mem::swap(&mut self.block, &mut old_block); old_block @@ -128,7 +136,7 @@ impl<'ctx, 'a> Scope<'ctx, 'a> { } impl mir::Statement { - pub fn codegen<'ctx, 'a>(&self, scope: &mut Scope<'ctx, 'a>) -> Option { + fn codegen<'ctx, 'a>(&self, scope: &mut Scope<'ctx, 'a>) -> Option { match &self.0 { mir::StmtKind::Let(VariableReference(_, name, _), expression) => { let value = expression.codegen(scope).unwrap(); @@ -143,7 +151,7 @@ impl mir::Statement { } impl mir::IfExpression { - pub fn codegen<'ctx, 'a>(&self, scope: &mut Scope<'ctx, 'a>) -> Option { + fn codegen<'ctx, 'a>(&self, scope: &mut Scope<'ctx, 'a>) -> Option { let condition = self.0.codegen(scope).unwrap(); // Create blocks @@ -208,7 +216,7 @@ impl mir::IfExpression { } impl mir::Expression { - pub fn codegen<'ctx, 'a>(&self, scope: &mut Scope<'ctx, 'a>) -> Option { + fn codegen<'ctx, 'a>(&self, scope: &mut Scope<'ctx, 'a>) -> Option { match &self.0 { mir::ExprKind::Variable(varref) => { varref.0.is_known().expect("variable type unknown"); @@ -304,7 +312,7 @@ impl mir::CmpOperator { } impl mir::Block { - pub fn codegen<'ctx, 'a>(&self, mut scope: &mut Scope<'ctx, 'a>) -> Option { + fn codegen<'ctx, 'a>(&self, mut scope: &mut Scope<'ctx, 'a>) -> Option { for stmt in &self.statements { stmt.codegen(&mut scope); } @@ -325,11 +333,11 @@ impl mir::Block { } impl mir::Literal { - pub fn as_const(&self, block: &mut Block) -> InstructionValue { + fn as_const(&self, block: &mut Block) -> InstructionValue { block.build(self.as_const_kind()).unwrap() } - pub fn as_const_kind(&self) -> InstructionKind { + fn as_const_kind(&self) -> InstructionKind { InstructionKind::Constant(match *self { mir::Literal::I8(val) => ConstValue::I8(val), mir::Literal::I16(val) => ConstValue::I16(val), diff --git a/reid/src/lexer.rs b/reid/src/lexer.rs index 15c5f18..e1daf65 100644 --- a/reid/src/lexer.rs +++ b/reid/src/lexer.rs @@ -81,6 +81,7 @@ impl From for String { } } +/// A token with a position #[derive(Clone)] pub struct FullToken { pub token: Token, @@ -98,7 +99,7 @@ impl Debug for FullToken { pub type Position = (u32, u32); -pub struct Cursor<'a> { +struct Cursor<'a> { pub position: Position, char_stream: Chars<'a>, } @@ -128,6 +129,8 @@ impl<'a> Cursor<'a> { } } +/// Take source text and produce a list of [`FullToken`]s from it, ie. +/// tokenizing it. pub fn tokenize>(to_tokenize: T) -> Result, Error> { let to_tokenize = to_tokenize.into(); let mut cursor = Cursor { diff --git a/reid/src/lib.rs b/reid/src/lib.rs index 2aae4dd..8dd6b3c 100644 --- a/reid/src/lib.rs +++ b/reid/src/lib.rs @@ -11,11 +11,12 @@ mod pad_adapter; mod token_stream; mod util; -// TODO: -// 1. Make it so that TopLevelStatement can only be import or function def -// 2. Make BlockLevelStatement, that has everything TopLevelStatement has now -// 3. Make it so all codegen is done with a Block-struct, that represents a -// single proper block +// TODO (Missing Relevant Features): +// - Arrays +// - Structs (and custom types as such) +// - Extern functions +// - Strings +// - Loops #[derive(thiserror::Error, Debug)] pub enum ReidError { @@ -25,10 +26,11 @@ pub enum ReidError { ParserError(#[from] token_stream::Error), #[error("Errors during typecheck: {0:?}")] TypeCheckErrors(Vec>), - // #[error(transparent)] - // CodegenError(#[from] codegen::Error), } +/// Takes in a bit of source code, parses and compiles it and produces `hello.o` +/// and `hello.asm` from it, which can be linked using `ld` to produce an +/// executable file. pub fn compile(source: &str) -> Result { let tokens = lexer::tokenize(source)?; @@ -51,7 +53,7 @@ pub fn compile(source: &str) -> Result { dbg!(&ast_module); let mut mir_context = mir::Context::from(vec![ast_module]); - let state = mir_context.pass(&mut TypeCheck {}); + let state = mir_context.pass(&mut TypeCheck); dbg!(&state); println!("{}", &mir_context); @@ -67,9 +69,4 @@ pub fn compile(source: &str) -> Result { codegen_modules.compile(); Ok(String::new()) - - // Ok(match cogegen_module.module.print_to_string() { - // Ok(v) => v, - // Err(e) => panic!("Err: {:?}", e), - // }) } diff --git a/reid/src/mir/display.rs b/reid/src/mir/display.rs index 815fe67..ea44a57 100644 --- a/reid/src/mir/display.rs +++ b/reid/src/mir/display.rs @@ -1,4 +1,4 @@ -use std::fmt::{write, Debug, Display, Write}; +use std::fmt::{Debug, Display, Write}; use crate::pad_adapter::PadAdapter; diff --git a/reid/src/mir/mod.rs b/reid/src/mir/mod.rs index 3cc6dd8..1951cdd 100644 --- a/reid/src/mir/mod.rs +++ b/reid/src/mir/mod.rs @@ -1,6 +1,5 @@ /// In this module are defined structs that are used for performing passes on -/// Reid. It contains a simplified version of Reid which must already be -/// type-checked beforehand. +/// Reid. It contains a simplified version of Reid which can be e.g. typechecked. use crate::token_stream::TokenRange; mod display; diff --git a/reid/src/mir/pass.rs b/reid/src/mir/pass.rs index 7cba8db..c75ade4 100644 --- a/reid/src/mir/pass.rs +++ b/reid/src/mir/pass.rs @@ -1,3 +1,5 @@ +/// This module contains relevant code for [`Pass`] and shared code between +/// passes. Passes can be performed on Reid MIR to e.g. typecheck the code. use std::collections::HashMap; use std::error::Error as STDError; diff --git a/reid/src/mir/typecheck.rs b/reid/src/mir/typecheck.rs index 338e2c2..77c5c01 100644 --- a/reid/src/mir/typecheck.rs +++ b/reid/src/mir/typecheck.rs @@ -1,6 +1,7 @@ +/// This module contains code relevant to doing a type checking pass on the MIR. +/// During typechecking relevant types are also coerced if possible. use std::{convert::Infallible, iter}; -/// This module contains code relevant to doing a type checking pass on the MIR. use crate::{mir::*, util::try_all}; use TypeKind::*; use VagueType::*; @@ -29,9 +30,13 @@ pub enum ErrorKind { FunctionAlreadyDefined(String), #[error("Variable not defined: {0}")] VariableAlreadyDefined(String), + #[error("Function {0} was given {1} parameters, but {2} were expected")] + InvalidAmountParameters(String, usize, usize), } -pub struct TypeCheck {} +/// Struct used to implement a type-checking pass that can be performed on the +/// MIR. +pub struct TypeCheck; impl Pass for TypeCheck { type TError = ErrorKind; @@ -123,6 +128,7 @@ impl Block { } if let Some((return_kind, expr)) = &mut self.return_expression { + // Use function return type as hint if return is hard. let ret_hint_t = match return_kind { ReturnKind::Hard => state.scope.return_type_hint, ReturnKind::Soft => hint_t, @@ -193,8 +199,20 @@ impl Expression { .ok_or(ErrorKind::FunctionNotDefined(function_call.name.clone())); if let Ok(f) = true_function { - if function_call.parameters.len() != f.params.len() { - state.ok::<_, Infallible>(Err(ErrorKind::Null), self.1); + let param_len_given = function_call.parameters.len(); + let param_len_expected = f.params.len(); + + // Check that there are the same number of parameters given + // as expected + if param_len_given != param_len_expected { + state.ok::<_, Infallible>( + Err(ErrorKind::InvalidAmountParameters( + function_call.name.clone(), + param_len_given, + param_len_expected, + )), + self.1, + ); } let true_params_iter = f.params.into_iter().chain(iter::repeat(Vague(Unknown))); @@ -202,6 +220,7 @@ impl Expression { for (param, true_param_t) in function_call.parameters.iter_mut().zip(true_params_iter) { + // Typecheck every param separately let param_res = param.typecheck(state, Some(true_param_t)); let param_t = state.or_else(param_res, Vague(Unknown), param.1); state.ok(param_t.collapse_into(&true_param_t), param.1); @@ -223,15 +242,17 @@ impl Expression { let cond_t = state.or_else(cond_res, Vague(Unknown), cond.1); state.ok(cond_t.collapse_into(&Bool), cond.1); - let lhs_res = lhs.typecheck(state, hint_t); - let lhs_type = state.or_else(lhs_res, Vague(Unknown), lhs.meta); - let rhs_type = if let Some(rhs) = rhs { - let res = rhs.typecheck(state, hint_t); - state.or_else(res, Vague(Unknown), rhs.meta) + // Typecheck then/else return types and make sure they are the + // same, if else exists. + let then_res = lhs.typecheck(state, hint_t); + let then_ret_t = state.or_else(then_res, Vague(Unknown), lhs.meta); + let else_ret_t = if let Some(else_block) = rhs { + let res = else_block.typecheck(state, hint_t); + state.or_else(res, Vague(Unknown), else_block.meta) } else { Vague(Unknown) }; - lhs_type.collapse_into(&rhs_type) + then_ret_t.collapse_into(&else_ret_t) } ExprKind::Block(block) => block.typecheck(state, hint_t), } @@ -239,6 +260,8 @@ impl Expression { } impl Literal { + /// Try to coerce this literal, ie. convert it to a more specific type in + /// regards to the given hint if any. fn try_coerce(self, hint: Option) -> Result { if let Some(hint) = hint { use Literal as L; @@ -314,6 +337,8 @@ fn try_collapse(lhs: &TypeKind, rhs: &TypeKind) -> Result { } pub trait Collapsable: Sized + Clone { + /// Try to narrow two types into one singular type. E.g. Vague(Number) and + /// I32 could be narrowed to just I32. fn collapse_into(&self, other: &Self) -> Result; } diff --git a/reid/src/mir/types.rs b/reid/src/mir/types.rs index 6df91ed..253dea1 100644 --- a/reid/src/mir/types.rs +++ b/reid/src/mir/types.rs @@ -63,12 +63,3 @@ impl ReturnType for FunctionCall { Ok(self.return_type.clone()) } } - -// impl ReturnType for FunctionDefinition { -// fn return_type(&self) -> Result { -// match &self.kind { -// FunctionDefinitionKind::Local(block, _) => block.return_type(), -// FunctionDefinitionKind::Extern(type_kind) => Ok(type_kind.clone()), -// } -// } -// } diff --git a/reid/src/pad_adapter.rs b/reid/src/pad_adapter.rs index 08d01a7..2a367ff 100644 --- a/reid/src/pad_adapter.rs +++ b/reid/src/pad_adapter.rs @@ -1,29 +1,25 @@ -// Copyright (c) The Rust Project Contributors -// -// Permission is hereby granted, free of charge, to any -// person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the -// Software without restriction, including without -// limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of -// the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice -// shall be included in all copies or substantial portions -// of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. - +/// Copied from +/// https://github.com/rust-lang/rust/blob/6b3ae3f6e45a33c2d95fa0362c9b2593e567fd34/library/core/src/fmt/builders.rs#L102 +/// +/// Copyright (c) The Rust Project Contributors +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. use std::fmt; pub struct PadAdapter<'buf, 'state> { diff --git a/reid/src/token_stream.rs b/reid/src/token_stream.rs index a529476..617cf1f 100644 --- a/reid/src/token_stream.rs +++ b/reid/src/token_stream.rs @@ -1,8 +1,13 @@ +/// Contains relevant code for parsing tokens received from +/// Lexing/Tokenizing-stage. use crate::{ ast::parse::Parse, lexer::{FullToken, Position, Token}, }; +/// Utility struct that is able to parse [`FullToken`]s while being +/// failure-resistance in that it can backtrack easily, and is able to keep +/// track of parsed Token-ranges easily. pub struct TokenStream<'a, 'b> { ref_position: Option<&'b mut usize>, tokens: &'a [FullToken], @@ -157,6 +162,8 @@ impl Drop for TokenStream<'_, '_> { } } +/// Index-range that can be used with the original array of [`FullToken`]s to +/// retrieve the precise location of a failure. #[derive(Default, Clone, Copy)] pub struct TokenRange { pub start: usize, diff --git a/reid/src/util.rs b/reid/src/util.rs index cf45009..dc84b14 100644 --- a/reid/src/util.rs +++ b/reid/src/util.rs @@ -1,3 +1,6 @@ +/// Take a list of Results, and try to unwrap all of them. Returns a Result +/// which either contains every Result unwrapped, or every error that exists +/// within the array. pub fn try_all(list: Vec>) -> Result, Vec> { let mut successes = Vec::with_capacity(list.len()); let mut failures = Vec::with_capacity(list.len());