Last active
April 4, 2022 06:36
-
-
Save amorphobia/96284562aa015a78473cd03c5698fadd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use crate::lexer::Lexer; | |
pub mod token { | |
pub mod tag { | |
#[derive(Clone, Copy, Debug)] | |
pub enum Tag { | |
Char(u8), | |
Num, | |
Id, | |
True, | |
False, | |
} | |
pub const NUM: Tag = Tag::Num; | |
pub const ID: Tag = Tag::Id; | |
pub const TRUE: Tag = Tag::True; | |
pub const FALSE: Tag = Tag::False; | |
} | |
use std::fmt::Debug; | |
pub use tag::*; | |
pub trait Token { | |
fn tag(&self) -> Tag; | |
} | |
#[derive(Clone, Copy, Debug)] | |
pub struct Num { | |
tag: Tag, | |
value: i32, | |
} | |
impl Token for Num { | |
fn tag(&self) -> Tag { | |
self.tag | |
} | |
} | |
impl Num { | |
pub fn new(value: i32) -> Self { | |
Self { tag: NUM, value } | |
} | |
pub fn value(&self) -> i32 { | |
self.value | |
} | |
} | |
#[derive(Clone, Debug)] | |
pub struct Word { | |
tag: Tag, | |
lexeme: String, | |
} | |
impl Token for Word { | |
fn tag(&self) -> Tag { | |
self.tag | |
} | |
} | |
impl Word { | |
pub fn new(tag: Tag, lexeme: &str) -> Self { | |
Self { | |
tag, | |
lexeme: lexeme.to_string(), | |
} | |
} | |
pub fn lexeme(&self) -> &str { | |
&self.lexeme | |
} | |
} | |
#[derive(Clone, Copy, Debug)] | |
pub struct Other { | |
tag: Tag, | |
} | |
impl Token for Other { | |
fn tag(&self) -> Tag { | |
self.tag | |
} | |
} | |
impl Other { | |
pub fn new(tag: Tag) -> Self { | |
Self { tag } | |
} | |
} | |
} | |
mod lexer { | |
use std::{ | |
collections::HashMap, | |
io::{Bytes, Error, ErrorKind, Read, Stdin}, | |
iter::Peekable, | |
}; | |
pub use crate::token::*; | |
#[derive(Debug)] | |
pub struct Lexer { | |
pub line: usize, | |
peek: char, | |
words: HashMap<String, Word>, | |
input: Peekable<Bytes<Stdin>>, | |
} | |
impl Lexer { | |
pub fn new() -> Self { | |
let mut lexer = Self { | |
line: 0, | |
peek: ' ', | |
words: HashMap::new(), | |
input: std::io::stdin().bytes().peekable(), | |
}; | |
lexer.reserve(Word::new(TRUE, "true")); | |
lexer.reserve(Word::new(FALSE, "false")); | |
lexer | |
} | |
fn reserve(&mut self, token: Word) { | |
self.words.insert(token.lexeme().to_string(), token); | |
} | |
fn peek_char(&mut self) -> Option<u8> { | |
if let Some(peek) = self.input.peek() { | |
match peek { | |
Ok(peek) => Some(*peek), | |
Err(_) => None, | |
} | |
} else { | |
None | |
} | |
} | |
fn next_char(&mut self) -> Option<u8> { | |
if let Some(next) = self.input.next() { | |
match next { | |
Ok(next) => Some(next), | |
Err(_) => None, | |
} | |
} else { | |
None | |
} | |
} | |
fn skip_blank_and_comments(&mut self) { | |
while let Some(peek) = self.next_char() { | |
self.peek = char::from(peek); | |
match self.peek { | |
' ' | '\t' => continue, | |
'\r' | '\n' => self.skip_new_line(), | |
'/' => match self.peek_char() { | |
Some(b'/') | Some(b'*') => self.skip_comments(), | |
_ => break, | |
}, | |
_ => break, | |
} | |
} | |
} | |
fn skip_new_line(&mut self) { | |
if self.peek == '\r' { | |
self.next_char(); | |
} | |
self.line += 1; | |
} | |
fn skip_comments(&mut self) { | |
if let Some(peek) = self.peek_char() { | |
match peek { | |
b'/' => self.skip_line(), | |
b'*' => { | |
self.next_char(); | |
while let Some(peek) = self.next_char() { | |
self.peek = char::from(peek); | |
match self.peek { | |
'\r' | '\n' => self.skip_new_line(), | |
'*' => { | |
if let Some(peek) = self.peek_char() { | |
match peek { | |
b'/' => { | |
self.next_char(); | |
break; | |
} | |
_ => (), | |
} | |
} else { | |
// EOF | |
todo!() | |
} | |
} | |
_ => (), | |
} | |
} | |
} | |
_ => unreachable!(), | |
} | |
} else { | |
unreachable!() | |
} | |
} | |
fn skip_line(&mut self) { | |
while let Some(peek) = self.next_char() { | |
self.peek = char::from(peek); | |
match self.peek { | |
'\r' | '\n' => { | |
self.skip_new_line(); | |
break; | |
} | |
_ => (), | |
} | |
} | |
} | |
pub fn scan(&mut self) -> Result<Box<dyn Token>, Error> { | |
self.skip_blank_and_comments(); | |
if self.peek.is_ascii_digit() { | |
let mut value = 0; | |
loop { | |
value = 10 * value | |
+ self | |
.peek | |
.to_digit(10) | |
.ok_or(Error::new(ErrorKind::Other, "Not digit"))?; | |
self.peek = if let Some(peek) = self.next_char() { | |
char::from(peek) | |
} else { | |
' ' | |
}; | |
if !self.peek.is_ascii_digit() { | |
break; | |
} | |
} | |
Ok(Box::new(Num::new( | |
value | |
.try_into() | |
.map_err(|err| Error::new(ErrorKind::Other, err))?, | |
))) | |
} else if self.peek.is_ascii_alphabetic() { | |
let mut buf = String::new(); | |
loop { | |
buf.push(self.peek); | |
self.peek = if let Some(peek) = self.next_char() { | |
char::from(peek) | |
} else { | |
' ' | |
}; | |
if !self.peek.is_ascii_alphanumeric() { | |
break; | |
} | |
} | |
Ok(Box::new( | |
self.words | |
.entry(buf.clone()) | |
.or_insert(Word::new(ID, &buf)) | |
.clone(), | |
)) | |
} else if "!<=>".contains(self.peek) { | |
match self.peek { | |
'!' => todo!(), | |
'<' => todo!(), | |
'=' => todo!(), | |
'>' => todo!(), | |
_ => unreachable!(), | |
} | |
} else { | |
let peek = self | |
.peek | |
.try_into() | |
.map_err(|err| Error::new(ErrorKind::Other, err))?; | |
self.peek = ' '; | |
Ok(Box::new(Other::new(Tag::Char(peek)))) | |
} | |
} | |
} | |
} | |
fn main() { | |
let mut lexer = Lexer::new(); | |
let result = lexer.scan().expect("Error"); | |
println!("{:?}", lexer); | |
println!("{:?}", result.tag()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment