167 lines
4.1 KiB
Rust
167 lines
4.1 KiB
Rust
use std::{fmt::Debug, iter::Peekable, str::Chars};
|
|
|
|
static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
|
|
|
|
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, String> {
|
|
let to_tokenize = to_tokenize.into();
|
|
let mut position = (0, 1);
|
|
let mut cursor = Cursor {
|
|
char_stream: to_tokenize.chars().peekable(),
|
|
position,
|
|
};
|
|
|
|
let mut tokens = Vec::new();
|
|
|
|
while let Some(character) = &cursor.next() {
|
|
position.0 += 1;
|
|
if *character == '\n' {
|
|
position.1 += 1;
|
|
position.0 = 0;
|
|
}
|
|
|
|
let peek = cursor.peek();
|
|
|
|
let variant = match character {
|
|
// Whitespace
|
|
w if w.is_whitespace() => continue,
|
|
// Comments
|
|
'/' if peek == Some(&'/') => {
|
|
while !matches!(&cursor.peek(), Some('\n')) {
|
|
cursor.next();
|
|
}
|
|
continue;
|
|
}
|
|
// "words"
|
|
c if c.is_alphabetic() => {
|
|
let mut value = character.to_string();
|
|
while let Some(c) = &cursor.peek() {
|
|
if !c.is_ascii_alphanumeric() {
|
|
break;
|
|
}
|
|
value += &c.to_string();
|
|
cursor.next();
|
|
}
|
|
|
|
// Check for keywords
|
|
let variant = match value.as_str() {
|
|
"let" => Token::LetKeyword,
|
|
"import" => Token::ImportKeyword,
|
|
_ => Token::Identifier(value),
|
|
};
|
|
variant
|
|
}
|
|
// Decimals
|
|
c if DECIMAL_NUMERICS.contains(c) => {
|
|
let mut value = character.to_string();
|
|
while let Some(c) = &cursor.peek() {
|
|
if !DECIMAL_NUMERICS.contains(c) {
|
|
break;
|
|
}
|
|
value += &c.to_string();
|
|
cursor.next();
|
|
}
|
|
Token::DecimalValue(value)
|
|
}
|
|
// Single character tokens
|
|
'=' => Token::Equals,
|
|
';' => Token::Semicolon,
|
|
':' => Token::Colon,
|
|
'+' => Token::Plus,
|
|
'*' => Token::Times,
|
|
'(' => Token::ParenOpen,
|
|
')' => Token::ParenClose,
|
|
',' => Token::Comma,
|
|
// Invalid token
|
|
_ => Err(format!(
|
|
"Unknown token '{}' at {}, {}",
|
|
character, position.0, position.1
|
|
))?,
|
|
};
|
|
|
|
tokens.push(FullToken {
|
|
token: variant,
|
|
position,
|
|
});
|
|
}
|
|
|
|
position.0 += 1;
|
|
|
|
tokens.push(FullToken {
|
|
token: Token::Eof,
|
|
position,
|
|
});
|
|
|
|
Ok(tokens)
|
|
}
|
|
|
|
#[derive(Debug, Eq, PartialEq, Clone)]
|
|
pub enum Token {
|
|
// Values
|
|
Identifier(String),
|
|
/// Number with at most one decimal point
|
|
DecimalValue(String),
|
|
|
|
// Keywords
|
|
LetKeyword,
|
|
ImportKeyword,
|
|
|
|
// Symbols
|
|
Semicolon,
|
|
Equals,
|
|
Colon,
|
|
Plus,
|
|
Times,
|
|
ParenOpen, // (
|
|
ParenClose, // )
|
|
Comma,
|
|
|
|
Eof,
|
|
}
|
|
|
|
impl Token {
|
|
pub fn get_token_prec(&self) -> i8 {
|
|
match &self {
|
|
Token::Plus => 10,
|
|
Token::Times => 20,
|
|
_ => -1,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct FullToken {
|
|
pub token: Token,
|
|
position: Position,
|
|
}
|
|
|
|
impl Debug for FullToken {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.write_fmt(format_args!(
|
|
"{:?} (Ln {}, Col {})",
|
|
self.token, self.position.1, self.position.0
|
|
))
|
|
}
|
|
}
|
|
|
|
pub type Position = (u32, u32);
|
|
|
|
pub struct Cursor<'a> {
|
|
pub position: Position,
|
|
char_stream: Peekable<Chars<'a>>,
|
|
}
|
|
|
|
impl<'a> Cursor<'a> {
|
|
fn next(&mut self) -> Option<char> {
|
|
let next = self.char_stream.next();
|
|
self.position.0 += 1;
|
|
if let Some('\n') = next {
|
|
self.position.1 += 1;
|
|
self.position.0 = 0;
|
|
}
|
|
next
|
|
}
|
|
|
|
fn peek(&mut self) -> Option<&char> {
|
|
self.char_stream.peek()
|
|
}
|
|
}
|