Created
November 16, 2024 17:39
-
-
Save CryZe/41a7d5855d1dd8f0cb095893bca31d5e to your computer and use it in GitHub Desktop.
Very simple lexer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct Lexer<'a> { | |
input: &'a str, | |
total_len: usize, | |
} | |
impl<'a> Lexer<'a> { | |
fn new(input: &'a str) -> Self { | |
Self { | |
input, | |
total_len: input.len(), | |
} | |
} | |
fn expected<T>(&mut self, expected: &'static str) -> Result<T, Error> { | |
Err(Error { | |
expected, | |
pos: self.total_len - self.input.len(), | |
}) | |
} | |
fn whitespace(&mut self) { | |
self.input = self.input.trim_start(); | |
} | |
fn is_empty(&mut self) -> bool { | |
self.whitespace(); | |
self.input.is_empty() | |
} | |
fn ident(&mut self, expected: &'static str) -> Result<&'a str, Error> { | |
self.whitespace(); | |
self.glued_ident(expected) | |
} | |
fn glued_ident(&mut self, expected: &'static str) -> Result<&'a str, Error> { | |
let rem = self.input.trim_start_matches(|c: char| c.is_alphanumeric()); | |
let pos = self.input.len() - rem.len(); | |
if pos == 0 { | |
return self.expected(expected); | |
} | |
// SAFETY: The position is known to be valid. | |
let ident = unsafe { self.input.get_unchecked(..pos) }; | |
self.input = rem; | |
Ok(ident) | |
} | |
fn token(&mut self, token: &'static str) -> Result<(), Error> { | |
self.whitespace(); | |
self.glued_token(token) | |
} | |
fn glued_token(&mut self, token: &'static str) -> Result<(), Error> { | |
if let Some(rest) = self.input.strip_prefix(token) { | |
self.input = rest; | |
Ok(()) | |
} else { | |
self.expected(token) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment