Last active
March 4, 2023 19:55
-
-
Save iemelyanov/67a76fdc19e6bc2baf82968913fc3065 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::fmt::Display; | |
#[derive(Clone, Copy, PartialEq, Eq)] | |
enum Token { | |
Minus, | |
Plus, | |
Mul, | |
Div, | |
Num(i32), | |
Eof, | |
} | |
impl Display for Token { | |
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
match self { | |
Self::Div => write!(f, "/"), | |
Self::Mul => write!(f, "*"), | |
Self::Minus => write!(f, "-"), | |
Self::Plus => write!(f, "+"), | |
Self::Num(n) => write!(f, "{}", n), | |
_ => unreachable!(), | |
} | |
} | |
} | |
struct Lexer<'input> { | |
input: &'input [u8], | |
start: usize, | |
curent: usize, | |
} | |
impl<'input> Lexer<'input> { | |
fn new(input: &'input str) -> Self { | |
Self { | |
input: input.as_bytes(), | |
start: 0, | |
curent: 0, | |
} | |
} | |
fn is_at_end(&self) -> bool { | |
self.curent >= self.input.len() | |
} | |
fn peek(&self) -> u8 { | |
if self.is_at_end() { | |
b'\0' | |
} else { | |
self.input[self.curent] | |
} | |
} | |
fn advance(&mut self) -> u8 { | |
if self.is_at_end() { | |
return b'\0'; | |
} | |
let c = self.input[self.curent]; | |
self.curent += 1; | |
c | |
} | |
fn next(&mut self) -> Token { | |
self.start = self.curent; | |
let mut c = self.advance(); | |
while c.is_ascii_whitespace() { | |
c = self.advance(); | |
self.start = self.curent - 1; | |
} | |
match c { | |
b'+' => Token::Plus, | |
b'-' => Token::Minus, | |
b'*' => Token::Mul, | |
b'/' => Token::Div, | |
b'0'..=b'9' => { | |
while self.peek().is_ascii_digit() { | |
self.advance(); | |
} | |
let n = String::from_utf8_lossy(&self.input[self.start..self.curent]) | |
.parse::<i32>() | |
.unwrap(); | |
Token::Num(n) | |
} | |
_ => Token::Eof, | |
} | |
} | |
} | |
enum Expr { | |
Atom(Token), | |
Prefix { | |
op: Token, | |
rhs: Option<Box<Expr>>, | |
}, | |
Infix { | |
op: Token, | |
lhs: Option<Box<Expr>>, | |
rhs: Option<Box<Expr>>, | |
}, | |
} | |
#[derive(PartialEq, PartialOrd)] | |
enum P { | |
Lowest, | |
MinusPlus, | |
DivMul, | |
Prefix, | |
} | |
impl Display for Expr { | |
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
match self { | |
Self::Atom(v) => { | |
write!(f, "{}", v) | |
} | |
Self::Prefix { op, rhs } => { | |
write!(f, "({}{})", op, rhs.as_ref().unwrap()) | |
} | |
Self::Infix { op, lhs, rhs } => { | |
write!( | |
f, | |
"({} {} {})", | |
lhs.as_ref().unwrap(), | |
op, | |
rhs.as_ref().unwrap() | |
) | |
} | |
} | |
} | |
} | |
struct Parser<'input> { | |
lexer: Lexer<'input>, | |
cur_tok: Token, | |
peek_tok: Token, | |
trace_tab_cnt: usize, | |
} | |
impl<'input> Parser<'input> { | |
fn new(input: &'input str) -> Self { | |
let mut s = Self { | |
lexer: Lexer::new(input), | |
cur_tok: Token::Eof, | |
peek_tok: Token::Eof, | |
trace_tab_cnt: 0, | |
}; | |
s.next_token(); | |
s.next_token(); | |
s | |
} | |
fn next_token(&mut self) { | |
self.cur_tok = self.peek_tok; | |
self.peek_tok = self.lexer.next(); | |
} | |
fn trace_begin(&mut self, name: &str) { | |
println!("{:width$}BEGIN {}", "", name, width = self.trace_tab_cnt); | |
self.trace_tab_cnt += 4; | |
} | |
fn trace_end(&mut self, name: &str) { | |
self.trace_tab_cnt -= 4; | |
println!("{:width$}END {}", "", name, width = self.trace_tab_cnt); | |
} | |
fn parse_expr(&mut self, p: P) -> Option<Box<Expr>> { | |
self.trace_begin("parse_expr"); | |
let mut lhs = match self.cur_tok { | |
Token::Num(_) => self.parse_atom(), | |
Token::Minus | Token::Plus => self.parse_prefix(), | |
_ => None, | |
}; | |
while self.peek_tok != Token::Eof && p < Self::bp(self.peek_tok) { | |
self.next_token(); | |
lhs = self.parse_infix(lhs); | |
} | |
self.trace_end("parse_expr"); | |
lhs | |
} | |
fn parse_prefix(&mut self) -> Option<Box<Expr>> { | |
self.trace_begin("parse_prefix"); | |
let op = self.cur_tok; | |
self.next_token(); | |
let expr = Some(Box::new(Expr::Prefix { | |
op, | |
rhs: self.parse_expr(P::Prefix), | |
})); | |
self.trace_end("parse_prefix"); | |
expr | |
} | |
fn parse_infix(&mut self, lhs: Option<Box<Expr>>) -> Option<Box<Expr>> { | |
self.trace_begin("parse_infix"); | |
let op = self.cur_tok; | |
self.next_token(); | |
let rhs = self.parse_expr(Self::bp(op)); | |
let expr = Some(Box::new(Expr::Infix { | |
op, | |
lhs: lhs, | |
rhs: rhs, | |
})); | |
self.trace_end("parse_infix"); | |
expr | |
} | |
fn parse_atom(&mut self) -> Option<Box<Expr>> { | |
self.trace_begin("parse_atom"); | |
let r = Some(Box::new(Expr::Atom(self.cur_tok))); | |
self.trace_end("parse_atom"); | |
r | |
} | |
fn bp(t: Token) -> P { | |
match t { | |
Token::Plus | Token::Minus => P::MinusPlus, | |
Token::Mul | Token::Div => P::DivMul, | |
_ => P::Lowest, | |
} | |
} | |
} | |
fn eval(expr: &Option<Box<Expr>>) -> i32 { | |
match expr { | |
Some(ref expr) => match expr.as_ref() { | |
Expr::Atom(n) => match n { | |
Token::Num(ref n) => *n, | |
_ => unreachable!(), | |
}, | |
Expr::Infix { op, lhs, rhs } => match op { | |
Token::Minus => eval(&lhs) - eval(&rhs), | |
Token::Plus => eval(&lhs) + eval(&rhs), | |
Token::Div => eval(&lhs) / eval(&rhs), | |
Token::Mul => eval(&lhs) * eval(&rhs), | |
_ => unreachable!(), | |
}, | |
Expr::Prefix { op, rhs } => match op { | |
Token::Minus => -eval(&rhs), | |
_ => eval(&rhs), | |
}, | |
}, | |
None => 0, | |
} | |
} | |
fn main() { | |
let input = "-1 + 2 * 3"; | |
let mut parser = Parser::new(&input); | |
let expr = parser.parse_expr(P::Lowest); | |
print!("{} -> ", input); | |
expr.as_ref().map(|ast| { | |
print!("{}", ast); | |
}); | |
println!(" = {}", eval(&expr)); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import "fmt" | |
type tokenKind int | |
const ( | |
tokenAtom tokenKind = iota | |
tokenOp | |
tokenEof | |
) | |
type token struct { | |
literal byte | |
kind tokenKind | |
} | |
type lexer struct { | |
tokens []token | |
pos int | |
} | |
func lexerNew(input []byte) *lexer { | |
l := &lexer{} | |
for _, b := range input { | |
switch b { | |
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': | |
l.tokens = append(l.tokens, token{ | |
literal: b, | |
kind: tokenAtom, | |
}) | |
case ' ': | |
continue | |
default: | |
l.tokens = append(l.tokens, token{ | |
literal: b, | |
kind: tokenOp, | |
}) | |
} | |
} | |
return l | |
} | |
func (l *lexer) next() token { | |
if l.pos >= len(l.tokens) { | |
return token{kind: tokenEof} | |
} | |
t := l.tokens[l.pos] | |
l.pos++ | |
return t | |
} | |
func (l *lexer) peek() token { | |
if l.pos >= len(l.tokens) { | |
return token{kind: tokenEof} | |
} | |
return l.tokens[l.pos] | |
} | |
type sexpr interface { | |
String() string | |
} | |
type atom struct { | |
b byte | |
} | |
type cons struct { | |
b byte | |
sexprs []sexpr | |
} | |
func (a *atom) String() string { | |
return string(a.b) | |
} | |
func (c *cons) String() string { | |
t := fmt.Sprintf("(%c", c.b) | |
for _, e := range c.sexprs { | |
t += " " + e.String() | |
} | |
t += ")" | |
return t | |
} | |
func expr(input []byte) sexpr { | |
l := lexerNew(input) | |
return exprBp(l, 0) | |
} | |
func exprBp(l *lexer, bp int) sexpr { | |
var lhs sexpr | |
switch t := l.next(); t.kind { | |
case tokenAtom: | |
lhs = &atom{b: t.literal} | |
default: | |
panic(fmt.Sprintf("unexpected token '%c'", t.literal)) | |
} | |
loop: | |
for { | |
var op byte | |
switch t := l.peek(); t.kind { | |
case tokenOp: | |
op = t.literal | |
case tokenEof: | |
break loop | |
default: | |
panic(fmt.Sprintf("unexpected token '%c'", t.literal)) | |
} | |
var lbp, rbp int | |
switch op { | |
case '+', '-': | |
lbp, rbp = 1, 2 | |
case '*', '/': | |
lbp, rbp = 3, 4 | |
default: | |
panic(fmt.Sprintf("bad op '%c'", op)) | |
} | |
if lbp < bp { | |
break | |
} | |
l.next() | |
rhs := exprBp(l, rbp) | |
lhs = &cons{ | |
b: op, | |
sexprs: []sexpr{lhs, rhs}, | |
} | |
} | |
return lhs | |
} | |
func main() { | |
fmt.Println(expr([]byte("1 + 2 * 3 / 4 - 5 + 6"))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment