Last active
March 28, 2017 03:40
-
-
Save jsimmons/bd8ccef992f5a8d7247110b4c2abf455 to your computer and use it in GitHub Desktop.
Writing some Stupid Stuff in rust for giggles.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::iter::Peekable; | |
use std::str::FromStr; | |
#[derive(Copy, Clone, PartialEq, Eq, Debug)] | |
struct Atom(usize); | |
struct StringPool { | |
symbols: Vec<String> | |
} | |
impl StringPool { | |
fn new() -> StringPool { | |
StringPool { | |
symbols: Vec::new() | |
} | |
} | |
fn intern(&mut self, string: &str) -> Atom { | |
match self.symbols.iter().position(|&ref x| x == string) { | |
Some(i) => Atom(i), | |
None => { | |
self.symbols.push(string.to_string()); | |
Atom(self.symbols.len() - 1) | |
} | |
} | |
} | |
} | |
// Lexer | |
// | |
#[derive(Copy, Clone, Debug, PartialEq, Eq)] | |
enum Token { | |
Number(u64), | |
Variable(Atom), | |
LParen, | |
RParen, | |
LBracket, | |
RBracket, | |
LBrace, | |
RBrace, | |
If, | |
Then, | |
Else, | |
Elif, | |
End, | |
While, | |
Return | |
} | |
struct Tokenizer<I: Iterator<Item=char>> { | |
iter: Peekable<I>, | |
strings: StringPool | |
} | |
impl <I: Iterator<Item=char>>Tokenizer<I> { | |
fn new(iter: I, strings: StringPool) -> Tokenizer<I> { | |
Tokenizer { | |
iter: iter.peekable(), | |
strings: strings | |
} | |
} | |
} | |
impl <I: Iterator<Item=char>> Iterator for Tokenizer<I> { | |
type Item = Token; | |
fn next(&mut self) -> Option<Token> { | |
match self.iter.peek().cloned() { | |
Some('(') => { | |
self.iter.next(); | |
Some(Token::LParen) | |
}, | |
Some(')') => { | |
self.iter.next(); | |
Some(Token::RParen) | |
}, | |
Some('{') => { | |
self.iter.next(); | |
Some(Token::LBracket) | |
}, | |
Some('}') => { | |
self.iter.next(); | |
Some(Token::RBracket) | |
}, | |
Some(x) if x.is_numeric() => { | |
let num_string = self.iter.by_ref().take_while(|& c| c.is_numeric()).collect::<String>(); | |
Some(Token::Number(u64::from_str(&num_string).unwrap_or(0))) | |
} | |
Some(x) if !x.is_whitespace() => { | |
// Using iterators on top of iterators means we have to make a new string here which is really dumb. | |
// But whatever. | |
let string = self.iter.by_ref().take_while(|& c| !c.is_whitespace()).collect::<String>(); | |
let atom = self.strings.intern(&string); | |
Some(Token::Variable(atom)) | |
} | |
Some(_) => { | |
self.iter.next(); | |
self.next() | |
}, | |
None => None | |
} | |
} | |
} | |
fn main() { | |
println!("Hello, world!"); | |
let mut pool = StringPool::new(); | |
let hello = pool.intern("Hello"); | |
let world = pool.intern("World"); | |
assert_eq!(hello, pool.intern("Hello")); | |
let code = "()()(){} Hello"; | |
let mut tokenizer = Tokenizer::new(code.chars(), pool); | |
assert_eq!(tokenizer.next(), Some(Token::LParen)); | |
assert_eq!(tokenizer.next(), Some(Token::RParen)); | |
assert_eq!(tokenizer.next(), Some(Token::LParen)); | |
assert_eq!(tokenizer.next(), Some(Token::RParen)); | |
assert_eq!(tokenizer.next(), Some(Token::LParen)); | |
assert_eq!(tokenizer.next(), Some(Token::RParen)); | |
assert_eq!(tokenizer.next(), Some(Token::LBracket)); | |
assert_eq!(tokenizer.next(), Some(Token::RBracket)); | |
assert_eq!(tokenizer.next(), Some(Token::Variable(hello))); | |
assert_eq!(tokenizer.next(), None); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment