Created
December 11, 2019 10:53
-
-
Save stmtk1/6eb78830fc68dc3b15ea73d3d6c3a1c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use regex::Regex; | |
use std::cell::RefCell; | |
use std::rc::Rc; | |
fn main() { | |
println!("{:?}", Parser::new("(+ 1234 1)").token_tree); | |
} | |
#[derive(Clone, Debug)] | |
enum Token { | |
Number(u64), | |
LParen, | |
RParen, | |
Plus, | |
Space, | |
End, | |
} | |
struct Lexer { | |
tokens : Vec<Token>, | |
} | |
impl Lexer { | |
fn new(parsed: &str) -> Lexer { | |
let mut target = String::from(parsed); | |
let number_reg: Regex = Regex::new(r"^\d+").unwrap(); | |
let lparen_reg: Regex = Regex::new(r"^\(").unwrap(); | |
let rparen_reg: Regex = Regex::new(r"^\)").unwrap(); | |
let plus_reg: Regex = Regex::new(r"^\+").unwrap(); | |
let end_reg: Regex = Regex::new(r"^$").unwrap(); | |
let space_reg: Regex = Regex::new(r"^\s+").unwrap(); | |
let mut tokens = Vec::new(); | |
loop { | |
if let Some(val) = number_reg.captures(target.as_str()) { | |
tokens.push(Token::Number(val.get(0).unwrap().as_str().parse().unwrap())); | |
target = number_reg.replace(target.as_str(), "").into_owned(); | |
} else if plus_reg.is_match(target.as_str()) { | |
tokens.push(Token::Plus); | |
target = plus_reg.replace(target.as_str(), "").into_owned(); | |
} else if lparen_reg.is_match(target.as_str()) { | |
tokens.push(Token::LParen); | |
target = lparen_reg.replace(target.as_str(), "").into_owned(); | |
} else if rparen_reg.is_match(target.as_str()) { | |
tokens.push(Token::RParen); | |
target = rparen_reg.replace(target.as_str(), "").into_owned(); | |
} else if space_reg.is_match(target.as_str()) { | |
tokens.push(Token::Space); | |
target = space_reg.replace(target.as_str(), "").into_owned(); | |
} else if end_reg.is_match(target.as_str()) { | |
tokens.push(Token::End); | |
break; | |
} else { | |
panic!("parse error in lexer!"); | |
} | |
} | |
Lexer { | |
tokens, | |
} | |
} | |
} | |
#[derive(Clone, Debug)] | |
enum TokenTree { | |
Leaf(Token), | |
Branch(Vec<Rc<RefCell<TokenTree>>>), | |
} | |
impl TokenTree { | |
fn new(tokens: &Vec<Token>) -> TokenTree { | |
let (tree, index) = TokenTree::new_with_index(tokens, 0); | |
if tokens.len() <= index + 1 { | |
panic!("parse error"); | |
} | |
match tokens[index + 1] { | |
Token::End => (), | |
_ => { | |
println!("index: {}, len: {}", index, tokens.len()); | |
panic!("parse error!"); | |
}, | |
} | |
tree | |
} | |
fn new_leaf(token: Token) -> TokenTree { | |
TokenTree::Leaf(token) | |
} | |
fn new_with_index(tokens: &Vec<Token>, index: usize) -> (TokenTree, usize) { | |
let mut tree = Vec::new(); | |
let mut now = 0; | |
for (i, token) in (&tokens).into_iter().skip(index).enumerate() { | |
if i < now { | |
continue; | |
} | |
match token { | |
Token::LParen => { | |
let (branch, new_index) = TokenTree::new_with_index(tokens, i + 1); | |
now = new_index; | |
tree.push(Rc::new(RefCell::new(branch))); | |
}, | |
Token::RParen => { | |
return (TokenTree::Branch(tree), i); | |
}, | |
_ => { | |
let leaf = TokenTree::new_leaf(token.clone()); | |
tree.push(Rc::new(RefCell::new(leaf))); | |
}, | |
} | |
} | |
(TokenTree::Branch(tree), tokens.len() - 1) | |
} | |
} | |
struct Parser { | |
token_tree: TokenTree, | |
} | |
impl Parser { | |
fn new(parsed: &str) -> Parser { | |
let token_tree: TokenTree = TokenTree::new(&Lexer::new(parsed).tokens); | |
Parser { | |
token_tree | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment