Created
June 1, 2024 03:40
-
-
Save MikuroXina/5f2504f215245980563357d08121532d to your computer and use it in GitHub Desktop.
The example implementation of full-scratch LR(0) parser.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! The example implementation of full-scratch LR(0) parser, referenced [Wikipedia](https://en.wikipedia.org/wiki/LR_parser). | |
//! | |
//! # Rules | |
//! | |
//! There are non-terminal terms: Start, Expr and Bool. And there are terminal | |
//! terms: 0, 1, +. * and $ (end of input). | |
//! | |
//! 1. Introduction: `Start :- Expr $` | |
//! 2. Expr from multiplication: `Expr :- Expr * Bool` | |
//! 3. Expr from addition: `Expr :- Expr + Bool` | |
//! 4. Expr from Bool: `Expr :- Bool` | |
//! 5. Bool from 0: `Bool :- 0` | |
//! 6. Bool from 1: `Bool :- 1` | |
//! | |
//! # State closures | |
//! | |
//! Items denote the using rule and its matching position by a `.` symbol. A | |
//! closure can be collected from the term after the `.` symbol repeatedly. A | |
//! new state can make from the each items but advanced the `.` symbol to the | |
//! right. | |
//! | |
//! 1. Expression (from all rules) | |
//! - Start :- . Expr $ | |
//! - Expr :- . Expr * Bool | |
//! - Expr :- . Expr + Bool | |
//! - Expr :- . Bool | |
//! - Bool :- . 0 | |
//! - Bool :- . 1 | |
//! 2. None after zero (advanced `Bool :- . 0`) | |
//! - Bool :- 0 . | |
//! 3. None after one (advanced `Bool :- . 1`) | |
//! - Bool :- 1 . | |
//! 4. Operators after Expr (advanced `Start :- . Expr $`) | |
//! - Start :- Expr . $ | |
//! - Expr :- Expr . * Bool | |
//! - Expr :- Expr . + Bool | |
//! 5. None after `Bool` (advanced `Expr :- . Bool`) | |
//! - Expr :- Bool . | |
//! 6. `Bool` after operator `*` (advanced `Expr :- Expr . * Bool`) | |
//! - Expr :- Expr * . Bool | |
//! - Bool :- . 0 | |
//! - Bool :- . 1 | |
//! 7. `Bool` after operator `+` (advanced `Expr :- Expr . + Bool`) | |
//! - Expr :- Expr + . Bool | |
//! - Bool :- . 0 | |
//! - Bool :- . 1 | |
//! 8. None after `Expr * Bool` (advanced `Expr * . Bool`) | |
//! - Expr :- Expr * Bool . | |
//! 9. None after `Expr + Bool` (advanced `Expr + . Bool`) | |
//! - Expr :- Expr + Bool . | |
//! | |
//! # Actions between state and input | |
//! | |
//! The `action` function decides by following conditions: | |
//! | |
//! - When `Start`: | |
//! - If read `0`, then `Shift(ZeroIntoBool)`. | |
//! - If read `1`, then `Shift(OneIntoBool)`. | |
//! - When `ZeroIntoBool`: | |
//! - `Reduce(BoolFromZero)`. | |
//! - When `OneIntoBool`: | |
//! - `Reduce(BoolFromOne)`. | |
//! - When `Expr`: | |
//! - If read `*`, then `Shift(Multiply)`. | |
//! - If read `+`, then `Shift(Add)`. | |
//! - If read `$`, then `Accept`. | |
//! - When `BoolIntoExpr`: | |
//! - `Reduce(ExprFromBool)`. | |
//! - When `Multiply` or `Add` (same as `Start` by chance): | |
//! - If read `0`, `Shift(ZeroIntoBool)`. | |
//! - If read `1`, `Shift(OneIntoBool)`. | |
//! - When `MultiplyIntoExpr`: | |
//! - `Reduce(ExprFromExprMulBool)`. | |
//! - When `AddIntoExpr`: | |
//! - `Reduce(ExprFromExprAddBool)`. | |
//! - Otherwise it had syntax errors. | |
#[derive(Debug, Clone, Copy, PartialEq, Eq)] | |
enum Token { | |
End, | |
Asterisk, | |
Plus, | |
Zero, | |
One, | |
Expr, | |
Bool, | |
} | |
impl Token { | |
fn is_terminal(self) -> bool { | |
matches!( | |
self, | |
Token::Asterisk | Token::Plus | Token::Zero | Token::One | Token::End, | |
) | |
} | |
} | |
#[derive(Debug, Clone, Copy, PartialEq, Eq)] | |
enum State { | |
Start, | |
ZeroIntoBool, | |
OneIntoBool, | |
Expr, | |
BoolIntoExpr, | |
Multiply, | |
Add, | |
MultiplyIntoExpr, | |
AddIntoExpr, | |
} | |
#[derive(Debug, Clone, Copy, PartialEq, Eq)] | |
enum Rule { | |
ExprFromExprMulBool, | |
ExprFromExprAddBool, | |
ExprFromBool, | |
BoolFromZero, | |
BoolFromOne, | |
} | |
impl Rule { | |
fn apply(self, stack: &mut Vec<State>) { | |
match self { | |
Rule::ExprFromExprMulBool => { | |
assert!(stack.ends_with(&[State::Expr, State::Multiply, State::MultiplyIntoExpr])); | |
stack.pop(); | |
stack.pop(); | |
stack.pop(); | |
stack.push(goto(stack.last().unwrap(), Token::Expr)); | |
} | |
Rule::ExprFromExprAddBool => { | |
assert!(stack.ends_with(&[State::Expr, State::Add, State::AddIntoExpr])); | |
stack.pop(); | |
stack.pop(); | |
stack.pop(); | |
stack.push(goto(stack.last().unwrap(), Token::Expr)); | |
} | |
Rule::ExprFromBool => { | |
assert!(stack.ends_with(&[State::BoolIntoExpr])); | |
stack.pop(); | |
stack.push(goto(stack.last().unwrap(), Token::Expr)); | |
} | |
Rule::BoolFromZero => { | |
assert!(stack.ends_with(&[State::ZeroIntoBool])); | |
stack.pop(); | |
stack.push(goto(stack.last().unwrap(), Token::Bool)) | |
} | |
Rule::BoolFromOne => { | |
assert!(stack.ends_with(&[State::OneIntoBool])); | |
stack.pop(); | |
stack.push(goto(stack.last().unwrap(), Token::Bool)) | |
} | |
} | |
} | |
} | |
#[derive(Debug, Clone, Copy)] | |
enum Action { | |
Shift(State), | |
Reduce(Rule), | |
Accept, | |
} | |
fn goto(state: &State, read: Token) -> State { | |
match (state, read) { | |
(State::Start, Token::Expr) => State::Expr, | |
(State::Start, Token::Bool) => State::BoolIntoExpr, | |
(State::Multiply, Token::Bool) => State::MultiplyIntoExpr, | |
(State::Add, Token::Bool) => State::AddIntoExpr, | |
_ => panic!("syntax error"), | |
} | |
} | |
fn action(state: &State, read: Token) -> Action { | |
if !read.is_terminal() { | |
panic!("syntax error"); | |
} | |
match (state, read) { | |
(State::Start, Token::Zero) => Action::Shift(State::ZeroIntoBool), | |
(State::Start, Token::One) => Action::Shift(State::OneIntoBool), | |
(State::ZeroIntoBool, _) => Action::Reduce(Rule::BoolFromZero), | |
(State::OneIntoBool, _) => Action::Reduce(Rule::BoolFromOne), | |
(State::Expr, Token::Asterisk) => Action::Shift(State::Multiply), | |
(State::Expr, Token::Plus) => Action::Shift(State::Add), | |
(State::Expr, Token::End) => Action::Accept, | |
(State::BoolIntoExpr, _) => Action::Reduce(Rule::ExprFromBool), | |
(State::Multiply | State::Add, Token::Zero) => Action::Shift(State::ZeroIntoBool), | |
(State::Multiply | State::Add, Token::One) => Action::Shift(State::OneIntoBool), | |
(State::MultiplyIntoExpr, _) => Action::Reduce(Rule::ExprFromExprMulBool), | |
(State::AddIntoExpr, _) => Action::Reduce(Rule::ExprFromExprAddBool), | |
_ => panic!("syntax error"), | |
} | |
} | |
fn parse(input: impl IntoIterator<Item = Token>) -> Vec<Rule> { | |
let mut iter = input.into_iter().peekable(); | |
let mut stack = vec![State::Start]; | |
let mut history = vec![]; | |
loop { | |
let head = iter.peek().cloned().unwrap(); | |
match action(stack.last().unwrap(), head) { | |
Action::Shift(to) => { | |
iter.next(); | |
stack.push(to); | |
} | |
Action::Reduce(rule) => { | |
rule.apply(&mut stack); | |
history.push(rule); | |
} | |
Action::Accept => { | |
return history; | |
} | |
} | |
} | |
} | |
fn tokenize(source: &str) -> Vec<Token> { | |
let mut tokens = vec![]; | |
for ch in source.chars() { | |
if ch.is_whitespace() { | |
continue; | |
} | |
tokens.push(match ch { | |
'*' => Token::Asterisk, | |
'+' => Token::Plus, | |
'0' => Token::Zero, | |
'1' => Token::One, | |
_ => panic!("unknown token: {}", ch), | |
}) | |
} | |
tokens.push(Token::End); | |
tokens | |
} | |
#[test] | |
fn simple() { | |
let res = parse(tokenize("1 + 1 * 0")); | |
assert_eq!( | |
res, | |
vec![ | |
Rule::BoolFromOne, | |
Rule::ExprFromBool, | |
Rule::BoolFromOne, | |
Rule::ExprFromExprAddBool, | |
Rule::BoolFromZero, | |
Rule::ExprFromExprMulBool, | |
] | |
); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment