Last active
May 10, 2020 22:12
-
-
Save xanathar/64896c8f979764bf572deda42abe1fbf to your computer and use it in GitHub Desktop.
Draft of RD parser for simple expressions, written in Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::VecDeque; | |
use crate::errors::{ Result, Error }; | |
use crate::langvalue::LangValue; | |
// Grammar | |
// | |
// name := <string> | |
// condop: GreaterThan | LessThan | GreaterThanEqual | LessThanEqual | Equals | NotEquals | Contains | |
// notop := Not | |
// logop := And | Or | |
// open := OpenSubExpr | |
// close := CloseSubExpr | |
// cond := name condop name | |
// subexpr := cond | open expr close | not expr | |
// expr := subexpr (logop subexpr)* | |
const MAX_RECURSION_DEPTH: u32 = 1000; | |
#[derive(Clone, Copy, Debug, PartialEq)] | |
enum TokenType { | |
Name, | |
GreaterThan, | |
LessThan, | |
GreaterThanEq, | |
LessThanEq, | |
Equals, | |
NotEquals, | |
Contains, | |
Not, | |
And, | |
Or, | |
OpenSubExpr, | |
CloseSubExpr, | |
} | |
struct Token { | |
pub token:TokenType, | |
pub value:String, | |
} | |
impl std::fmt::Display for Token { | |
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
write!(f, "'{}'", self.value) | |
} | |
} | |
impl std::fmt::Debug for Token { | |
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
write!(f, "'{}'", self.value) | |
} | |
} | |
struct Lexer { | |
input: VecDeque<String>, | |
buffer: VecDeque<Token>, | |
} | |
impl Lexer { | |
fn push(&mut self, token_str: String) { | |
self.buffer.push_back(Token { | |
value: token_str.clone(), | |
token: match token_str.as_str() { | |
"gt" => TokenType::GreaterThan, | |
"greater" => TokenType::GreaterThan, | |
"gte" => TokenType::GreaterThanEq, | |
"greatereq" => TokenType::GreaterThanEq, | |
"lt" => TokenType::LessThan, | |
"less" => TokenType::LessThan, | |
"lessthan" => TokenType::LessThan, | |
"lte" => TokenType::LessThanEq, | |
"lesseq" => TokenType::LessThanEq, | |
"lessthaneq" => TokenType::LessThanEq, | |
"is" => TokenType::Equals, | |
"equals" => TokenType::Equals, | |
"eq" => TokenType::Equals, | |
"=" => TokenType::Equals, | |
"isnot" => TokenType::NotEquals, | |
"not-equals" => TokenType::NotEquals, | |
"neq" => TokenType::NotEquals, | |
"!= " => TokenType::NotEquals, | |
"not" => TokenType::Not, | |
"!" => TokenType::Not, | |
"contains" => TokenType::Contains, | |
"and" => TokenType::And, | |
"or" => TokenType::Or, | |
"(" => TokenType::OpenSubExpr, | |
")" => TokenType::CloseSubExpr, | |
_ => TokenType::Name, | |
}, | |
}); | |
} | |
fn buffer_str(&mut self, expr: String) { | |
let mut cur_token = String::new(); | |
for c in expr.chars() { | |
if c == '(' || c == ')' { | |
if cur_token.len() > 0 { | |
self.push(cur_token.clone()); | |
cur_token.clear(); | |
} | |
self.push(c.to_string()); | |
} else if c == '=' { | |
if cur_token == "!" { | |
self.push("!=".to_string()); | |
cur_token.clear(); | |
} else { | |
if cur_token.len() > 0 { | |
self.push(cur_token.clone()); | |
cur_token.clear(); | |
} | |
self.push(c.to_string()); | |
} | |
} else { | |
cur_token.push(c); | |
} | |
} | |
if cur_token.len() > 0 { | |
self.push(cur_token.clone()); | |
} | |
} | |
fn feed_buffer(&mut self) { | |
if self.buffer.is_empty() && !self.input.is_empty() { | |
let token_str = self.input.pop_front().unwrap(); | |
self.buffer_str(token_str); | |
} | |
} | |
pub fn tokenize(expr: Vec<String>) -> Self { | |
Lexer { | |
buffer: VecDeque::new(), | |
input: VecDeque::from(expr), | |
} | |
} | |
pub fn peek(&mut self) -> Option<&Token> { | |
self.feed_buffer(); | |
self.buffer.front() | |
} | |
pub fn pop(&mut self) -> Result<Token> { | |
self.feed_buffer(); | |
match self.buffer.pop_front() { | |
Some(t) => Ok(t), | |
None => Err(Error::ExpressionSyntaxError("expression ends unexpectedly".to_string())) | |
} | |
} | |
} | |
#[derive(PartialEq)] | |
pub enum Expression { | |
GreaterThan(String, String), | |
LessThan(String, String), | |
GreaterThanEq(String, String), | |
LessThanEq(String, String), | |
Equals(String, String), | |
NotEquals(String, String), | |
Contains(String, String), | |
Not(Box<Expression>), | |
And(Box<Expression>, Box<Expression>), | |
Or(Box<Expression>, Box<Expression>), | |
} | |
impl std::fmt::Display for Expression { | |
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
match self { | |
Expression::GreaterThan(s1, s2) => write!(f, "({} > {})", s1, s2), | |
Expression::LessThan(s1, s2) => write!(f, "({} < {})", s1, s2), | |
Expression::GreaterThanEq(s1, s2) => write!(f, "({} >= {})", s1, s2), | |
Expression::LessThanEq(s1, s2) => write!(f, "({} <= {})", s1, s2), | |
Expression::Equals(s1, s2) => write!(f, "({} == {})", s1, s2), | |
Expression::NotEquals(s1, s2) => write!(f, "({} != {})", s1, s2), | |
Expression::Contains(s1, s2) => write!(f, "({} contains {})", s1, s2), | |
Expression::Not(e) => write!(f, "!({})", e), | |
Expression::And(e1, e2) => write!(f, "({} and {})", e1, e2), | |
Expression::Or(e1, e2) => write!(f, "({} or {})", e1, e2), | |
} | |
} | |
} | |
impl std::fmt::Debug for Expression { | |
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
write!(f, "{}", self) | |
} | |
} | |
impl Expression { | |
fn logop(lexer: &mut Lexer) -> Result<Token> { | |
let tkn = lexer.pop()?; | |
match tkn.token { | |
TokenType::And => Ok(tkn), | |
TokenType::Or => Ok(tkn), | |
_ => Err(Error::ExpressionSyntaxError(format!("logical operator expected, found {}", tkn))), | |
} | |
} | |
fn logexpr(left: Expression, op:Token, right:Expression) -> Result<Expression> { | |
match op.token { | |
TokenType::And => Ok(Expression::And(Box::new(left), Box::new(right))), | |
TokenType::Or => Ok(Expression::Or(Box::new(left), Box::new(right))), | |
_ => Err(Error::ExpressionSyntaxError(format!("logical operator expected, found {}", op))), | |
} | |
} | |
fn is_logop(tkn: Option<&Token>) -> bool { | |
if tkn.is_none() { | |
false | |
} else { | |
match tkn.unwrap().token { | |
TokenType::And => true, | |
TokenType::Or => true, | |
_ => false, | |
} | |
} | |
} | |
fn name(lexer: &mut Lexer) -> Result<String> { | |
let tkn = lexer.pop()?; | |
if tkn.token == TokenType::Name { | |
Ok(tkn.value) | |
} else { | |
Err(Error::ExpressionSyntaxError(format!("expected string or field, found {}", tkn))) | |
} | |
} | |
fn condop(lexer: &mut Lexer) -> Result<Token> { | |
let tkn = lexer.pop()?; | |
match tkn.token { | |
TokenType::GreaterThan => Ok(tkn), | |
TokenType::LessThan => Ok(tkn), | |
TokenType::GreaterThanEq => Ok(tkn), | |
TokenType::LessThanEq => Ok(tkn), | |
TokenType::Equals => Ok(tkn), | |
TokenType::NotEquals => Ok(tkn), | |
TokenType::Contains => Ok(tkn), | |
_ => Err(Error::ExpressionSyntaxError(format!("conditional operator expected, found {}", tkn))), | |
} | |
} | |
fn condexpr(left: String, op:Token, right:String) -> Result<Expression> { | |
match op.token { | |
TokenType::GreaterThan => Ok(Expression::GreaterThan(left, right)), | |
TokenType::LessThan => Ok(Expression::LessThan(left, right)), | |
TokenType::GreaterThanEq => Ok(Expression::GreaterThanEq(left, right)), | |
TokenType::LessThanEq => Ok(Expression::LessThanEq(left, right)), | |
TokenType::Equals => Ok(Expression::Equals(left, right)), | |
TokenType::NotEquals => Ok(Expression::NotEquals(left, right)), | |
TokenType::Contains => Ok(Expression::Contains(left, right)), | |
_ => Err(Error::ExpressionSyntaxError(format!("conditional operator expected, found {}", op))), | |
} | |
} | |
fn close_sub_expr(lexer: &mut Lexer) -> Result<()> { | |
let tkn = lexer.pop()?; | |
if tkn.token == TokenType::CloseSubExpr { | |
Ok(()) | |
} else { | |
Err(Error::ExpressionSyntaxError(format!("expected ')' found {}", tkn))) | |
} | |
} | |
fn subexpr(lexer: &mut Lexer, nest: u32) -> Result<Expression> { | |
if nest > MAX_RECURSION_DEPTH { | |
return Err(Error::ExpressionTooComplex); | |
} | |
let tkn = lexer.pop()?; | |
match tkn.token { | |
TokenType::Name => { | |
let op = Expression::condop(lexer)?; | |
let rname = Expression::name(lexer)?; | |
Ok(Expression::condexpr(tkn.value, op, rname)?) | |
}, | |
TokenType::OpenSubExpr => { | |
let subexpr = Expression::expr(lexer, nest + 1)?; | |
Expression::close_sub_expr(lexer)?; | |
Ok(subexpr) | |
}, | |
TokenType::Not => { | |
Ok(Expression::Not(Box::new(Expression::expr(lexer, nest + 1)?))) | |
}, | |
_ => Err(Error::ExpressionSyntaxError(format!("expression expected, found {}", tkn))), | |
} | |
} | |
fn expr(lexer: &mut Lexer, nest: u32) -> Result<Expression> { | |
if nest > MAX_RECURSION_DEPTH { | |
return Err(Error::ExpressionTooComplex); | |
} | |
let mut exprv = Expression::subexpr(lexer, nest + 1)?; | |
while Expression::is_logop(lexer.peek()) { | |
let op = Expression::logop(lexer)?; | |
let rexpr = Expression::subexpr(lexer, nest + 1)?; | |
exprv = Expression::logexpr(exprv, op, rexpr)?; | |
} | |
match lexer.peek() { | |
None => Ok(exprv), | |
Some(t) => match t.token { | |
TokenType::CloseSubExpr => Ok(exprv), | |
_ => Err(Error::ExpressionSyntaxError(format!("unexpected token {}", t))) | |
}, | |
} | |
} | |
pub fn parse_args(expression_args: Vec<String>) -> Result<Self> { | |
let mut lexer = Lexer::tokenize(expression_args); | |
let ast = Expression::expr(&mut lexer, 0)?; | |
if lexer.peek().is_none() { | |
Ok(ast) | |
} else { | |
Err(Error::ExpressionSyntaxError(format!("unexpected token '{}' at the end", lexer.peek().unwrap()))) | |
} | |
} | |
pub fn parse(expr_str: &str) -> Result<Self> { | |
let s = String::from(expr_str); | |
let mut v = Vec::new(); | |
for t in s.split_whitespace() { | |
v.push(String::from(t)); | |
} | |
Expression::parse_args(v) | |
} | |
pub fn eval(&self, evaluator: &dyn ExpressionEvaluator, item: &LangValue) -> Result<bool> { | |
self.eval_impl(evaluator, item, 0) | |
} | |
fn eval_impl(&self, evaluator: &dyn ExpressionEvaluator, item: &LangValue, nest: u32) -> Result<bool> { | |
if nest > MAX_RECURSION_DEPTH { | |
return Err(Error::ExpressionTooComplex); | |
} | |
match self { | |
Expression::Not(e) => Ok(!e.eval_impl(evaluator, item, nest + 1)?), | |
Expression::And(e1, e2) => Ok(e1.eval_impl(evaluator, item, nest + 1)? && e2.eval_impl(evaluator, item, nest + 1)?), | |
Expression::Or(e1, e2) => Ok(e1.eval_impl(evaluator, item, nest + 1)? || e2.eval_impl(evaluator, item, nest + 1)?), | |
_ => evaluator.eval(self, item) | |
} | |
} | |
} | |
pub trait ExpressionEvaluator { | |
fn eval(&self, expr: &Expression, item: &LangValue) -> Result<bool>; | |
} | |
#[test] | |
fn ql_cond_basic() { | |
let e1 = Expression::parse_args(vec!["a".to_string(), "gt".to_string(), "b".to_string()]).unwrap(); | |
println!("expr: {}", e1); | |
let e2 = Expression::GreaterThan("a".to_string(), "b".to_string()); | |
assert_eq!(e1, e2); | |
} | |
#[test] | |
fn ql_cond_basic2() { | |
let e1 = Expression::parse("a gt b").unwrap(); | |
println!("expr: {}", e1); | |
let e2 = Expression::GreaterThan("a".to_string(), "b".to_string()); | |
assert_eq!(e1, e2); | |
} | |
#[test] | |
fn ql_cond_logic() { | |
let e1 = Expression::parse("a gt b and c lt d").unwrap(); | |
println!("expr: {}", e1); | |
let e2 = Expression::And( | |
Box::new(Expression::GreaterThan("a".to_string(), "b".to_string())), | |
Box::new(Expression::LessThan("c".to_string(), "d".to_string())) | |
); | |
assert_eq!(e1, e2); | |
} | |
#[test] | |
fn ql_cond_logic_associativity() { | |
let e1 = Expression::parse("a gt b and c lt d or e eq f").unwrap(); | |
println!("expr: {}", e1); | |
let e2 = Expression::Or( | |
Box::new(Expression::And( | |
Box::new(Expression::GreaterThan("a".to_string(), "b".to_string())), | |
Box::new(Expression::LessThan("c".to_string(), "d".to_string())) | |
)), | |
Box::new(Expression::Equals("e".to_string(), "f".to_string())) | |
); | |
assert_eq!(e1, e2); | |
} | |
#[test] | |
fn ql_cond_logic_brkt1() { | |
let e1 = Expression::parse("a gt b and (c lt d or e eq f)").unwrap(); | |
println!("expr: {}", e1); | |
let e2 = Expression::And( | |
Box::new(Expression::GreaterThan("a".to_string(), "b".to_string())), | |
Box::new(Expression::Or( | |
Box::new(Expression::LessThan("c".to_string(), "d".to_string())), | |
Box::new(Expression::Equals("e".to_string(), "f".to_string())) | |
)), | |
); | |
assert_eq!(e1, e2); | |
} | |
#[test] | |
fn ql_cond_logic_brkt2() { | |
let e1 = Expression::parse("(a gt b) and not(c lt d or e eq f)").unwrap(); | |
println!("expr: {}", e1); | |
let e2 = Expression::And( | |
Box::new(Expression::GreaterThan("a".to_string(), "b".to_string())), | |
Box::new(Expression::Not( | |
Box::new(Expression::Or( | |
Box::new(Expression::LessThan("c".to_string(), "d".to_string())), | |
Box::new(Expression::Equals("e".to_string(), "f".to_string())) | |
)), | |
)), | |
); | |
assert_eq!(e1, e2); | |
} | |
#[test] | |
fn ql_cond_logic_incomplete() { | |
assert!(Expression::parse("(a gt b) and not(c lt d or e eq f").is_err()); | |
assert!(Expression::parse("(a gt b) and not(c lt d or e eq").is_err()); | |
assert!(Expression::parse("(a gt b) and not(c lt d or e").is_err()); | |
assert!(Expression::parse("(a gt b) and not(c lt d").is_err()); | |
assert!(Expression::parse("(a gt b) and not(c lt").is_err()); | |
assert!(Expression::parse("(a gt b) and not(c").is_err()); | |
assert!(Expression::parse("(a gt b) and not(").is_err()); | |
assert!(Expression::parse("(a gt b) and not").is_err()); | |
assert!(Expression::parse("(a gt b) and").is_err()); | |
assert!(Expression::parse("(a gt b").is_err()); | |
assert!(Expression::parse("a gt").is_err()); | |
assert!(Expression::parse("a").is_err()); | |
} | |
#[test] | |
fn ql_cond_logic_syntax_errors() { | |
assert!(Expression::parse("(a doh b) and not(c lt d or e eq f").is_err()); | |
assert!(Expression::parse("(a and b) and not(c lt d or e eq f").is_err()); | |
assert!(Expression::parse("(a not b) and not(c lt d or e eq f").is_err()); | |
assert!(Expression::parse("(a gt b) and eq(c lt d or e eq f").is_err()); | |
assert!(Expression::parse("(a gt b) eq not(c lt d or e eq f").is_err()); | |
assert!(Expression::parse("(a gt b) and f(c lt d or e eq f").is_err()); | |
assert!(Expression::parse("a gt b c").is_err()); | |
} | |
#[test] | |
fn ql_cond_logic_brkt_props() { | |
let e1 = Expression::parse(".a gt b and (.c lt d or e eq .f)").unwrap(); | |
println!("expr: {}", e1); | |
let e2 = Expression::And( | |
Box::new(Expression::GreaterThan(".a".to_string(), "b".to_string())), | |
Box::new(Expression::Or( | |
Box::new(Expression::LessThan(".c".to_string(), "d".to_string())), | |
Box::new(Expression::Equals("e".to_string(), ".f".to_string())) | |
)), | |
); | |
assert_eq!(e1, e2); | |
} |
New revision fixes associativity (grammar was right, parser was not!) and extra tokens at the end. Workable state now.
Added evaluation with external evaluator over some fictitious LangValue type.
gist updates will stop here.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If anyone reading here - several bugs, first of all logical operators are right associative, should be left associative. Brackets not tested yet.