Skip to content

Instantly share code, notes, and snippets.

@khornberg
Last active December 19, 2018 20:52
Show Gist options
  • Save khornberg/8ec377d6fa8a6d2fa11744cfa6a05831 to your computer and use it in GitHub Desktop.
Save khornberg/8ec377d6fa8a6d2fa11744cfa6a05831 to your computer and use it in GitHub Desktop.
Boolean expression matching
from ply import lex
class BooleanMatchError(Exception):
pass
reserved = {'AND': 'AND', 'OR': 'OR'}
math_tokens = [
'NUMBER',
'GREATER_THAN',
'LESS_THAN',
'KEY',
]
tokens = ['KEY_VALUE', 'LPAREN', 'RPAREN', 'OP'] + list(reserved.values()) + math_tokens
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_GREATER_THAN = r'\>'
t_LESS_THAN = r'\<'
def t_OP(t):
r'AND|OR'
t.type = reserved.get(t.value, 'OP')
return t
def t_KEY_VALUE(t):
r'([\w_\-.]+):("[\w_\-. ]+"|[\w_\-.]+)'
t.value = str(t.value)
return t
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
def t_KEY(t):
r'([\w_\-.]+)'
t.value = str(t.value)
return t
# Define a rule so we can track line numbers
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# Error handling rule
def t_error(t):
print("Illegal character '{}'".format(t.value[0]))
raise BooleanMatchError()
t.lexer.skip(1)
# Build the lexer
lexer = lex.lex()
import re
import operator
quoted_string = re.compile(r'"(?P<value>.+)"')
def walk(data, keys):
key = keys.pop(0)
_data = data[key]
if keys:
_data = walk(_data, keys)
return _data
def get_value(value):
is_quoted = re.match(quoted_string, value)
if is_quoted:
return is_quoted.group('value')
return value
def get_data_value(data, key):
if '.' in key:
keys = key.split('.')
return walk(data, keys)
return data[key]
def equal(a, b):
return a == b
def l_and(a, b):
return a and b
def l_or(a, b):
return a or b
def l_inequality(data, ast):
inequalities = {'>': operator.gt, '<': operator.lt}
value_1, relation, value_2 = ast
return inequalities[relation[0]](data[value_1], value_2)
def evaluate_key_value(data, ast):
key, value = ast
value = get_value(value)
data_value = get_data_value(data, key)
return equal(data_value, value)
def match(data, ast):
if len(ast) == 3:
left, inner, right = ast
if left == '(' and right == ')':
return match(data, inner)
if inner[0] == 'AND':
return l_and(match(data, left), match(data, right))
if inner[0] == 'OR':
return l_or(match(data, left), match(data, right))
if inner[0] in ['<', '>']:
return l_inequality(data, ast)
return evaluate_key_value(data, ast)
from ply import yacc
from lexer import tokens # noqa F401
precedence = (
('right', 'OP'),
('right', 'AND', 'OR'),
)
def p_term_op_term(p):
'''
term : term OP term
| term AND term
| term OR term
| term GREATER_THAN term
| term LESS_THAN term
'''
p[0] = (p[1], (p[2], ), p[3])
def p_term_factor(p):
'term : factor'
p[0] = p[1]
def p_term_factor_with_parens(p):
'factor : LPAREN term RPAREN'
p[0] = (p[1], p[2], p[3])
def p_factor_field(p):
'factor : KEY_VALUE'
key, value = p[1].split(':')
p[0] = (key, value)
def p_factor_number(p):
'factor : NUMBER'
p[0] = p[1]
def p_factor_key(p):
'factor : KEY'
p[0] = p[1]
def p_error(p):
print("Syntax error in input!", p)
parser = yacc.yacc()
import unittest
from lexer import BooleanMatchError
from parser import parser
from matcher import match
class TestParser(unittest.TestCase):
def test_parser_unknown_syntax_does_not_parse(self):
with self.assertRaises(BooleanMatchError):
parser.parse('f_z')
with self.assertRaises(BooleanMatchError):
parser.parse(':')
with self.assertRaises(BooleanMatchError):
parser.parse('a:')
with self.assertRaises(BooleanMatchError):
parser.parse(':1')
with self.assertRaises(BooleanMatchError):
parser.parse('BUT')
with self.assertRaises(BooleanMatchError):
parser.parse('f:1 x AND Y a:9')
with self.assertRaises(BooleanMatchError):
parser.parse('f:1 ANDY a:9')
with self.assertRaises(BooleanMatchError):
parser.parse('f:1 XOR a:9')
def test_parser_single_field_with_value(self):
data = 'f_z:123'
result = parser.parse(data)
self.assertEqual(('f_z', '123'), result)
def test_parser_single_field_with_dotted_path(self):
data = 'a.b:1'
result = parser.parse(data)
self.assertEqual(('a.b', '1'), result)
def test_parser_single_field_with_quoted_value(self):
data = 'f_z:"1"'
result = parser.parse(data)
self.assertEqual(('f_z', '"1"'), result)
def test_parser_single_field_with_quoted_value_and_space(self):
data = 'f_z:"1 3"'
result = parser.parse(data)
self.assertEqual(('f_z', '"1 3"'), result)
def test_parser_single_field_with_value_and_parens(self):
data = '(f_z:123)'
result = parser.parse(data)
self.assertEqual(('(', ('f_z', '123'), ')'), result)
def test_parser_single_field_with_value_and_nested_parens(self):
data = '((f_z:123))'
result = parser.parse(data)
self.assertEqual(('(', ('(', ('f_z', '123'), ')'), ')'), result)
def test_parser_logical_AND_value(self):
data = 'f_z:123 AND f_x:0'
result = parser.parse(data)
self.assertEqual((('f_z', '123'), ('AND', ), ('f_x', '0')), result)
def test_parser_logical_OR_value(self):
data = 'f_z:123 OR f_x:0'
result = parser.parse(data)
self.assertEqual((('f_z', '123'), ('OR', ), ('f_x', '0')), result)
def test_parser_term_with_parens(self):
data = '(f_z:123 OR f_x:0)'
result = parser.parse(data)
self.assertEqual(('(', (('f_z', '123'), ('OR', ), ('f_x', '0')), ')'), result)
def test_parser_term_with_OR(self):
data = '(f_z:123 OR f_x:0) OR f_a:1'
result = parser.parse(data)
self.assertEqual((('(', (('f_z', '123'), ('OR', ), ('f_x', '0')), ')'), ('OR', ), ('f_a', '1')), result)
def test_parser_term_with_AND(self):
data = '(f_z:123 AND f_x:0) AND f_a:1'
result = parser.parse(data)
self.assertEqual((('(', (('f_z', '123'), ('AND', ), ('f_x', '0')), ')'), ('AND', ), ('f_a', '1')), result)
def test_parser_term_with_AND_and_OR(self):
data = '(f_z:123 AND f_x:0) OR f_a:1'
result = parser.parse(data)
self.assertEqual((('(', (('f_z', '123'), ('AND', ), ('f_x', '0')), ')'), ('OR', ), ('f_a', '1')), result)
def test_parser_term_with_OR_and_AND(self):
data = '(f_z:123 OR f_x:0) AND f_a:1'
result = parser.parse(data)
self.assertEqual((('(', (('f_z', '123'), ('OR', ), ('f_x', '0')), ')'), ('AND', ), ('f_a', '1')), result)
def test_parser_two_terms(self):
data = '(f_z:123 OR f_x:0) AND (f_a:1 OR f_b:2)'
result = parser.parse(data)
self.assertEqual(
(
('(', (('f_z', '123'), ('OR', ), ('f_x', '0')), ')'), ('AND', ),
('(', (('f_a', '1'), ('OR', ), ('f_b', '2')), ')')
), result
)
def test_parser_three_terms(self):
data = 'a:1 OR b:2 AND c:3'
result = parser.parse(data)
self.assertEqual((('a', '1'), ('OR', ), (('b', '2'), ('AND', ), ('c', '3'))), result)
def test_parser_three_terms_switched_boolean_operators(self):
data = 'a:1 AND b:2 OR c:3'
result = parser.parse(data)
self.assertEqual((('a', '1'), ('AND', ), (('b', '2'), ('OR', ), ('c', '3'))), result)
def test_parser_three_terms_with_quotes(self):
data = 'a:"0 1" AND b:2 OR c:"3 4"'
result = parser.parse(data)
self.assertEqual((('a', '"0 1"'), ('AND', ), (('b', '2'), ('OR', ), ('c', '"3 4"'))), result)
def test_parser_greater_than(self):
data = 'a>1'
result = parser.parse(data)
self.assertEqual(('a', ('>', ), 1), result)
def test_parser_less_than(self):
data = 'a<1'
result = parser.parse(data)
self.assertEqual(('a', ('<', ), 1), result)
class TestMatcher(unittest.TestCase):
def test_single_value_greater_than_match(self):
data = {'a': 2}
ast = parser.parse('a>1')
self.assertTrue(match(data, ast))
def test_single_value_less_than_match(self):
data = {'a': 0}
ast = parser.parse('a<1')
self.assertTrue(match(data, ast))
def test_more_complex_value_less_than_match(self):
data = {'a': 0, 'b': 4, 'c': 3}
ast = parser.parse('(a<1) AND (b>5) OR ((c>2))')
self.assertTrue(match(data, ast))
data = {'a': 0, 'b': 6, 'c': 2}
ast = parser.parse('(a<1) AND (b>5) OR ((c>2))')
self.assertTrue(match(data, ast))
def test_single_key_value_match(self):
data = {'a': '1'}
ast = parser.parse('a:1')
self.assertTrue(match(data, ast))
def test_dotted_key_value_match(self):
data = {'a': {'b': '1'}}
ast = parser.parse('a.b:1')
self.assertTrue(match(data, ast))
def test_dotted_key_value_match_many_nested_levels(self):
data = {'a': {'b': {'c': {'d': '2'}}}}
ast = parser.parse('a.b.c.d:2')
self.assertTrue(match(data, ast))
def test_single_key_value_with_quotes_match(self):
data = {'a': '1 3'}
ast = parser.parse('a:"1 3"')
self.assertTrue(match(data, ast))
def test_single_key_value_with_parens(self):
data = {'a': '1'}
ast = parser.parse('(a:1)')
self.assertTrue(match(data, ast))
def test_single_key_value_with_nested_parens(self):
data = {'a': '1'}
ast = parser.parse('((a:1))')
self.assertTrue(match(data, ast))
def test_match_and(self):
data = {'a': '1', 'b': '2'}
ast = parser.parse('a:1 AND b:2')
self.assertTrue(match(data, ast))
def test_match_and_false(self):
data = {'a': '1', 'b': '3'}
ast = parser.parse('a:1 AND b:2')
self.assertFalse(match(data, ast))
def test_match_or(self):
data = {'a': '1', 'b': '3'}
ast = parser.parse('a:1 OR b:2')
self.assertTrue(match(data, ast))
def test_match_or_false(self):
data = {'a': '2', 'b': '3'}
ast = parser.parse('a:1 OR b:2')
self.assertFalse(match(data, ast))
def test_match_parens_and_parens(self):
data = {'a': '2', 'b': '3', 'c': '4'}
ast = parser.parse('(a:2 AND b:3) AND (a:2 AND c:4)')
self.assertTrue(match(data, ast))
def test_match_or_and_and(self):
data = {'a': '2', 'b': '3', 'c': '4'}
ast = parser.parse('(a:1 OR b:3) AND (a:2 AND c:4)')
self.assertTrue(match(data, ast))
def test_match_or_and_and_false(self):
data = {'a': '2', 'b': '3', 'c': '4'}
ast = parser.parse('(a:1 OR b:4) AND (a:2 AND c:4)')
self.assertFalse(match(data, ast))
def test_match_nested_or_and_and_or(self):
data = {'a': '2', 'b': '3', 'c': '4'}
ast = parser.parse('((a:1 OR b:3) AND (a:2 OR c:5)) AND ((a:1 OR b:3) AND c:4)')
self.assertTrue(match(data, ast))
def test_match_nested_or_and_and_or_false(self):
data = {'a': '2', 'b': '3', 'c': '4'}
ast = parser.parse('((a:1 OR b:3) AND (a:2 OR c:5)) AND ((a:1 OR b:3) AND c:5)')
self.assertFalse(match(data, ast))
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment