Add simple tokenizer for easiest.reid

This commit is contained in:
Sofia 2023-07-27 17:40:12 +03:00
commit cca69976dd
9 changed files with 196 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

7
Cargo.lock generated Normal file
View File

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "reid"
version = "0.1.0"

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "reid"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

4
easiest.reid Normal file
View File

@ -0,0 +1,4 @@
// Hello, comment here!
let hello = 32;
let beep =
hello ;

9
easy.reid Normal file
View File

@ -0,0 +1,9 @@
// Arithmetic, function calls and imports!
import std::print;
let arithmetic = 3 + 2 * 5 + 1 * 2;
let multiplier = 5 * 2;
let result = arithmetic * multiplier + arithmetic;
print(result);

8
hard.reid Normal file
View File

@ -0,0 +1,8 @@
// New types, type-casting
import std::print;
let text: string = "hello there!";
let value: i16 = 123;
print(text + (value as string));

12
medium.reid Normal file
View File

@ -0,0 +1,12 @@
// if-statements, functions
import std::print;
fn fibonacci(value: i32) -> i32 {
if value < 3 {
return 1;
}
return fibonacci(value - 1) + fibonacci(value - 2);
}
print(fibonacci(15));

138
src/lexer.rs Normal file
View File

@ -0,0 +1,138 @@
use std::{fmt::Debug, iter::Peekable, str::Chars};
pub static EASIEST: &str = include_str!("../easiest.reid");
// pub static EASY: &str = include_str!("../easy.reid");
// pub static MEDIUM: &str = include_str!("../medium.reid");
// pub static HARD: &str = include_str!("../hard.reid");
static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<Token>, String> {
let to_tokenize = to_tokenize.into();
let mut position = (0, 1);
let mut cursor = Cursor {
char_stream: to_tokenize.chars().peekable(),
position,
};
let mut tokens = Vec::new();
while let Some(character) = &cursor.consume() {
position.0 += 1;
if *character == '\n' {
position.1 += 1;
position.0 = 0;
}
let peek = cursor.peek();
let variant = match character {
// Whitespace
w if w.is_whitespace() => continue,
// Comments
'/' if peek == Some(&'/') => {
while !matches!(&cursor.peek(), Some('\n')) {
cursor.consume();
}
continue;
}
// "words"
c if c.is_alphabetic() => {
let mut value = character.to_string();
while let Some(c) = &cursor.peek() {
if !c.is_ascii_alphanumeric() {
break;
}
value += &c.to_string();
cursor.consume();
}
// Check for keywords
let variant = match value.as_str() {
"let" => TokenVariant::LetKeyword,
_ => TokenVariant::Identifier(value),
};
variant
}
// Decimals
c if DECIMAL_NUMERICS.contains(c) => {
let mut value = character.to_string();
while let Some(c) = &cursor.peek() {
if !DECIMAL_NUMERICS.contains(c) {
break;
}
value += &c.to_string();
cursor.consume();
}
TokenVariant::DecimalValue(value)
}
// Single character tokens
'=' => TokenVariant::Equals,
';' => TokenVariant::Semicolon,
// Invalid token
_ => Err(format!(
"Unknown token '{}' at {}, {}",
character, position.0, position.1
))?,
};
tokens.push(Token { variant, position });
}
position.0 += 1;
tokens.push(Token {
variant: TokenVariant::Eof,
position,
});
Ok(tokens)
}
pub struct Token {
variant: TokenVariant,
position: Position,
}
impl Debug for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"{:?} (Ln {}, Col {})",
self.variant, self.position.1, self.position.0
))
}
}
pub type Position = (u32, u32);
#[derive(Debug)]
pub enum TokenVariant {
LetKeyword,
Semicolon,
Equals,
Identifier(String),
/// Number with at most one decimal point
DecimalValue(String),
Eof,
}
pub struct Cursor<'a> {
pub position: Position,
char_stream: Peekable<Chars<'a>>,
}
impl<'a> Cursor<'a> {
fn consume(&mut self) -> Option<char> {
let next = self.char_stream.next();
self.position.0 += 1;
if let Some('\n') = next {
self.position.1 += 1;
self.position.0 = 0;
}
next
}
fn peek(&mut self) -> Option<&char> {
self.char_stream.peek()
}
}

9
src/main.rs Normal file
View File

@ -0,0 +1,9 @@
use crate::lexer::EASIEST;
mod lexer;
fn main() {
let token_stream = lexer::tokenize(EASIEST).unwrap();
dbg!(&token_stream);
}