Skip to content

Instantly share code, notes, and snippets.

@MikuroXina
Created June 1, 2024 03:40
Show Gist options
  • Save MikuroXina/5f2504f215245980563357d08121532d to your computer and use it in GitHub Desktop.
Save MikuroXina/5f2504f215245980563357d08121532d to your computer and use it in GitHub Desktop.
The example implementation of full-scratch LR(0) parser.
//! The example implementation of full-scratch LR(0) parser, referenced [Wikipedia](https://en.wikipedia.org/wiki/LR_parser).
//!
//! # Rules
//!
//! There are non-terminal terms: Start, Expr and Bool. And there are terminal
//! terms: 0, 1, +. * and $ (end of input).
//!
//! 1. Introduction: `Start :- Expr $`
//! 2. Expr from multiplication: `Expr :- Expr * Bool`
//! 3. Expr from addition: `Expr :- Expr + Bool`
//! 4. Expr from Bool: `Expr :- Bool`
//! 5. Bool from 0: `Bool :- 0`
//! 6. Bool from 1: `Bool :- 1`
//!
//! # State closures
//!
//! Items denote the using rule and its matching position by a `.` symbol. A
//! closure can be collected from the term after the `.` symbol repeatedly. A
//! new state can make from the each items but advanced the `.` symbol to the
//! right.
//!
//! 1. Expression (from all rules)
//! - Start :- . Expr $
//! - Expr :- . Expr * Bool
//! - Expr :- . Expr + Bool
//! - Expr :- . Bool
//! - Bool :- . 0
//! - Bool :- . 1
//! 2. None after zero (advanced `Bool :- . 0`)
//! - Bool :- 0 .
//! 3. None after one (advanced `Bool :- . 1`)
//! - Bool :- 1 .
//! 4. Operators after Expr (advanced `Start :- . Expr $`)
//! - Start :- Expr . $
//! - Expr :- Expr . * Bool
//! - Expr :- Expr . + Bool
//! 5. None after `Bool` (advanced `Expr :- . Bool`)
//! - Expr :- Bool .
//! 6. `Bool` after operator `*` (advanced `Expr :- Expr . * Bool`)
//! - Expr :- Expr * . Bool
//! - Bool :- . 0
//! - Bool :- . 1
//! 7. `Bool` after operator `+` (advanced `Expr :- Expr . + Bool`)
//! - Expr :- Expr + . Bool
//! - Bool :- . 0
//! - Bool :- . 1
//! 8. None after `Expr * Bool` (advanced `Expr * . Bool`)
//! - Expr :- Expr * Bool .
//! 9. None after `Expr + Bool` (advanced `Expr + . Bool`)
//! - Expr :- Expr + Bool .
//!
//! # Actions between state and input
//!
//! The `action` function decides by following conditions:
//!
//! - When `Start`:
//! - If read `0`, then `Shift(ZeroIntoBool)`.
//! - If read `1`, then `Shift(OneIntoBool)`.
//! - When `ZeroIntoBool`:
//! - `Reduce(BoolFromZero)`.
//! - When `OneIntoBool`:
//! - `Reduce(BoolFromOne)`.
//! - When `Expr`:
//! - If read `*`, then `Shift(Multiply)`.
//! - If read `+`, then `Shift(Add)`.
//! - If read `$`, then `Accept`.
//! - When `BoolIntoExpr`:
//! - `Reduce(ExprFromBool)`.
//! - When `Multiply` or `Add` (same as `Start` by chance):
//! - If read `0`, `Shift(ZeroIntoBool)`.
//! - If read `1`, `Shift(OneIntoBool)`.
//! - When `MultiplyIntoExpr`:
//! - `Reduce(ExprFromExprMulBool)`.
//! - When `AddIntoExpr`:
//! - `Reduce(ExprFromExprAddBool)`.
//! - Otherwise it had syntax errors.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Token {
End,
Asterisk,
Plus,
Zero,
One,
Expr,
Bool,
}
impl Token {
fn is_terminal(self) -> bool {
matches!(
self,
Token::Asterisk | Token::Plus | Token::Zero | Token::One | Token::End,
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum State {
Start,
ZeroIntoBool,
OneIntoBool,
Expr,
BoolIntoExpr,
Multiply,
Add,
MultiplyIntoExpr,
AddIntoExpr,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Rule {
ExprFromExprMulBool,
ExprFromExprAddBool,
ExprFromBool,
BoolFromZero,
BoolFromOne,
}
impl Rule {
fn apply(self, stack: &mut Vec<State>) {
match self {
Rule::ExprFromExprMulBool => {
assert!(stack.ends_with(&[State::Expr, State::Multiply, State::MultiplyIntoExpr]));
stack.pop();
stack.pop();
stack.pop();
stack.push(goto(stack.last().unwrap(), Token::Expr));
}
Rule::ExprFromExprAddBool => {
assert!(stack.ends_with(&[State::Expr, State::Add, State::AddIntoExpr]));
stack.pop();
stack.pop();
stack.pop();
stack.push(goto(stack.last().unwrap(), Token::Expr));
}
Rule::ExprFromBool => {
assert!(stack.ends_with(&[State::BoolIntoExpr]));
stack.pop();
stack.push(goto(stack.last().unwrap(), Token::Expr));
}
Rule::BoolFromZero => {
assert!(stack.ends_with(&[State::ZeroIntoBool]));
stack.pop();
stack.push(goto(stack.last().unwrap(), Token::Bool))
}
Rule::BoolFromOne => {
assert!(stack.ends_with(&[State::OneIntoBool]));
stack.pop();
stack.push(goto(stack.last().unwrap(), Token::Bool))
}
}
}
}
#[derive(Debug, Clone, Copy)]
enum Action {
Shift(State),
Reduce(Rule),
Accept,
}
fn goto(state: &State, read: Token) -> State {
match (state, read) {
(State::Start, Token::Expr) => State::Expr,
(State::Start, Token::Bool) => State::BoolIntoExpr,
(State::Multiply, Token::Bool) => State::MultiplyIntoExpr,
(State::Add, Token::Bool) => State::AddIntoExpr,
_ => panic!("syntax error"),
}
}
fn action(state: &State, read: Token) -> Action {
if !read.is_terminal() {
panic!("syntax error");
}
match (state, read) {
(State::Start, Token::Zero) => Action::Shift(State::ZeroIntoBool),
(State::Start, Token::One) => Action::Shift(State::OneIntoBool),
(State::ZeroIntoBool, _) => Action::Reduce(Rule::BoolFromZero),
(State::OneIntoBool, _) => Action::Reduce(Rule::BoolFromOne),
(State::Expr, Token::Asterisk) => Action::Shift(State::Multiply),
(State::Expr, Token::Plus) => Action::Shift(State::Add),
(State::Expr, Token::End) => Action::Accept,
(State::BoolIntoExpr, _) => Action::Reduce(Rule::ExprFromBool),
(State::Multiply | State::Add, Token::Zero) => Action::Shift(State::ZeroIntoBool),
(State::Multiply | State::Add, Token::One) => Action::Shift(State::OneIntoBool),
(State::MultiplyIntoExpr, _) => Action::Reduce(Rule::ExprFromExprMulBool),
(State::AddIntoExpr, _) => Action::Reduce(Rule::ExprFromExprAddBool),
_ => panic!("syntax error"),
}
}
fn parse(input: impl IntoIterator<Item = Token>) -> Vec<Rule> {
let mut iter = input.into_iter().peekable();
let mut stack = vec![State::Start];
let mut history = vec![];
loop {
let head = iter.peek().cloned().unwrap();
match action(stack.last().unwrap(), head) {
Action::Shift(to) => {
iter.next();
stack.push(to);
}
Action::Reduce(rule) => {
rule.apply(&mut stack);
history.push(rule);
}
Action::Accept => {
return history;
}
}
}
}
fn tokenize(source: &str) -> Vec<Token> {
let mut tokens = vec![];
for ch in source.chars() {
if ch.is_whitespace() {
continue;
}
tokens.push(match ch {
'*' => Token::Asterisk,
'+' => Token::Plus,
'0' => Token::Zero,
'1' => Token::One,
_ => panic!("unknown token: {}", ch),
})
}
tokens.push(Token::End);
tokens
}
#[test]
fn simple() {
let res = parse(tokenize("1 + 1 * 0"));
assert_eq!(
res,
vec![
Rule::BoolFromOne,
Rule::ExprFromBool,
Rule::BoolFromOne,
Rule::ExprFromExprAddBool,
Rule::BoolFromZero,
Rule::ExprFromExprMulBool,
]
);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment