Last active
December 19, 2018 20:52
-
-
Save khornberg/8ec377d6fa8a6d2fa11744cfa6a05831 to your computer and use it in GitHub Desktop.
Boolean expression matching
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ply import lex | |
class BooleanMatchError(Exception): | |
pass | |
reserved = {'AND': 'AND', 'OR': 'OR'} | |
math_tokens = [ | |
'NUMBER', | |
'GREATER_THAN', | |
'LESS_THAN', | |
'KEY', | |
] | |
tokens = ['KEY_VALUE', 'LPAREN', 'RPAREN', 'OP'] + list(reserved.values()) + math_tokens | |
t_LPAREN = r'\(' | |
t_RPAREN = r'\)' | |
t_GREATER_THAN = r'\>' | |
t_LESS_THAN = r'\<' | |
def t_OP(t): | |
r'AND|OR' | |
t.type = reserved.get(t.value, 'OP') | |
return t | |
def t_KEY_VALUE(t): | |
r'([\w_\-.]+):("[\w_\-. ]+"|[\w_\-.]+)' | |
t.value = str(t.value) | |
return t | |
def t_NUMBER(t): | |
r'\d+' | |
t.value = int(t.value) | |
return t | |
def t_KEY(t): | |
r'([\w_\-.]+)' | |
t.value = str(t.value) | |
return t | |
# Define a rule so we can track line numbers | |
def t_newline(t): | |
r'\n+' | |
t.lexer.lineno += len(t.value) | |
# A string containing ignored characters (spaces and tabs) | |
t_ignore = ' \t' | |
# Error handling rule | |
def t_error(t): | |
print("Illegal character '{}'".format(t.value[0])) | |
raise BooleanMatchError() | |
t.lexer.skip(1) | |
# Build the lexer | |
lexer = lex.lex() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import operator | |
quoted_string = re.compile(r'"(?P<value>.+)"') | |
def walk(data, keys): | |
key = keys.pop(0) | |
_data = data[key] | |
if keys: | |
_data = walk(_data, keys) | |
return _data | |
def get_value(value): | |
is_quoted = re.match(quoted_string, value) | |
if is_quoted: | |
return is_quoted.group('value') | |
return value | |
def get_data_value(data, key): | |
if '.' in key: | |
keys = key.split('.') | |
return walk(data, keys) | |
return data[key] | |
def equal(a, b): | |
return a == b | |
def l_and(a, b): | |
return a and b | |
def l_or(a, b): | |
return a or b | |
def l_inequality(data, ast): | |
inequalities = {'>': operator.gt, '<': operator.lt} | |
value_1, relation, value_2 = ast | |
return inequalities[relation[0]](data[value_1], value_2) | |
def evaluate_key_value(data, ast): | |
key, value = ast | |
value = get_value(value) | |
data_value = get_data_value(data, key) | |
return equal(data_value, value) | |
def match(data, ast): | |
if len(ast) == 3: | |
left, inner, right = ast | |
if left == '(' and right == ')': | |
return match(data, inner) | |
if inner[0] == 'AND': | |
return l_and(match(data, left), match(data, right)) | |
if inner[0] == 'OR': | |
return l_or(match(data, left), match(data, right)) | |
if inner[0] in ['<', '>']: | |
return l_inequality(data, ast) | |
return evaluate_key_value(data, ast) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ply import yacc | |
from lexer import tokens # noqa F401 | |
precedence = ( | |
('right', 'OP'), | |
('right', 'AND', 'OR'), | |
) | |
def p_term_op_term(p): | |
''' | |
term : term OP term | |
| term AND term | |
| term OR term | |
| term GREATER_THAN term | |
| term LESS_THAN term | |
''' | |
p[0] = (p[1], (p[2], ), p[3]) | |
def p_term_factor(p): | |
'term : factor' | |
p[0] = p[1] | |
def p_term_factor_with_parens(p): | |
'factor : LPAREN term RPAREN' | |
p[0] = (p[1], p[2], p[3]) | |
def p_factor_field(p): | |
'factor : KEY_VALUE' | |
key, value = p[1].split(':') | |
p[0] = (key, value) | |
def p_factor_number(p): | |
'factor : NUMBER' | |
p[0] = p[1] | |
def p_factor_key(p): | |
'factor : KEY' | |
p[0] = p[1] | |
def p_error(p): | |
print("Syntax error in input!", p) | |
parser = yacc.yacc() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
from lexer import BooleanMatchError | |
from parser import parser | |
from matcher import match | |
class TestParser(unittest.TestCase): | |
def test_parser_unknown_syntax_does_not_parse(self): | |
with self.assertRaises(BooleanMatchError): | |
parser.parse('f_z') | |
with self.assertRaises(BooleanMatchError): | |
parser.parse(':') | |
with self.assertRaises(BooleanMatchError): | |
parser.parse('a:') | |
with self.assertRaises(BooleanMatchError): | |
parser.parse(':1') | |
with self.assertRaises(BooleanMatchError): | |
parser.parse('BUT') | |
with self.assertRaises(BooleanMatchError): | |
parser.parse('f:1 x AND Y a:9') | |
with self.assertRaises(BooleanMatchError): | |
parser.parse('f:1 ANDY a:9') | |
with self.assertRaises(BooleanMatchError): | |
parser.parse('f:1 XOR a:9') | |
def test_parser_single_field_with_value(self): | |
data = 'f_z:123' | |
result = parser.parse(data) | |
self.assertEqual(('f_z', '123'), result) | |
def test_parser_single_field_with_dotted_path(self): | |
data = 'a.b:1' | |
result = parser.parse(data) | |
self.assertEqual(('a.b', '1'), result) | |
def test_parser_single_field_with_quoted_value(self): | |
data = 'f_z:"1"' | |
result = parser.parse(data) | |
self.assertEqual(('f_z', '"1"'), result) | |
def test_parser_single_field_with_quoted_value_and_space(self): | |
data = 'f_z:"1 3"' | |
result = parser.parse(data) | |
self.assertEqual(('f_z', '"1 3"'), result) | |
def test_parser_single_field_with_value_and_parens(self): | |
data = '(f_z:123)' | |
result = parser.parse(data) | |
self.assertEqual(('(', ('f_z', '123'), ')'), result) | |
def test_parser_single_field_with_value_and_nested_parens(self): | |
data = '((f_z:123))' | |
result = parser.parse(data) | |
self.assertEqual(('(', ('(', ('f_z', '123'), ')'), ')'), result) | |
def test_parser_logical_AND_value(self): | |
data = 'f_z:123 AND f_x:0' | |
result = parser.parse(data) | |
self.assertEqual((('f_z', '123'), ('AND', ), ('f_x', '0')), result) | |
def test_parser_logical_OR_value(self): | |
data = 'f_z:123 OR f_x:0' | |
result = parser.parse(data) | |
self.assertEqual((('f_z', '123'), ('OR', ), ('f_x', '0')), result) | |
def test_parser_term_with_parens(self): | |
data = '(f_z:123 OR f_x:0)' | |
result = parser.parse(data) | |
self.assertEqual(('(', (('f_z', '123'), ('OR', ), ('f_x', '0')), ')'), result) | |
def test_parser_term_with_OR(self): | |
data = '(f_z:123 OR f_x:0) OR f_a:1' | |
result = parser.parse(data) | |
self.assertEqual((('(', (('f_z', '123'), ('OR', ), ('f_x', '0')), ')'), ('OR', ), ('f_a', '1')), result) | |
def test_parser_term_with_AND(self): | |
data = '(f_z:123 AND f_x:0) AND f_a:1' | |
result = parser.parse(data) | |
self.assertEqual((('(', (('f_z', '123'), ('AND', ), ('f_x', '0')), ')'), ('AND', ), ('f_a', '1')), result) | |
def test_parser_term_with_AND_and_OR(self): | |
data = '(f_z:123 AND f_x:0) OR f_a:1' | |
result = parser.parse(data) | |
self.assertEqual((('(', (('f_z', '123'), ('AND', ), ('f_x', '0')), ')'), ('OR', ), ('f_a', '1')), result) | |
def test_parser_term_with_OR_and_AND(self): | |
data = '(f_z:123 OR f_x:0) AND f_a:1' | |
result = parser.parse(data) | |
self.assertEqual((('(', (('f_z', '123'), ('OR', ), ('f_x', '0')), ')'), ('AND', ), ('f_a', '1')), result) | |
def test_parser_two_terms(self): | |
data = '(f_z:123 OR f_x:0) AND (f_a:1 OR f_b:2)' | |
result = parser.parse(data) | |
self.assertEqual( | |
( | |
('(', (('f_z', '123'), ('OR', ), ('f_x', '0')), ')'), ('AND', ), | |
('(', (('f_a', '1'), ('OR', ), ('f_b', '2')), ')') | |
), result | |
) | |
def test_parser_three_terms(self): | |
data = 'a:1 OR b:2 AND c:3' | |
result = parser.parse(data) | |
self.assertEqual((('a', '1'), ('OR', ), (('b', '2'), ('AND', ), ('c', '3'))), result) | |
def test_parser_three_terms_switched_boolean_operators(self): | |
data = 'a:1 AND b:2 OR c:3' | |
result = parser.parse(data) | |
self.assertEqual((('a', '1'), ('AND', ), (('b', '2'), ('OR', ), ('c', '3'))), result) | |
def test_parser_three_terms_with_quotes(self): | |
data = 'a:"0 1" AND b:2 OR c:"3 4"' | |
result = parser.parse(data) | |
self.assertEqual((('a', '"0 1"'), ('AND', ), (('b', '2'), ('OR', ), ('c', '"3 4"'))), result) | |
def test_parser_greater_than(self): | |
data = 'a>1' | |
result = parser.parse(data) | |
self.assertEqual(('a', ('>', ), 1), result) | |
def test_parser_less_than(self): | |
data = 'a<1' | |
result = parser.parse(data) | |
self.assertEqual(('a', ('<', ), 1), result) | |
class TestMatcher(unittest.TestCase): | |
def test_single_value_greater_than_match(self): | |
data = {'a': 2} | |
ast = parser.parse('a>1') | |
self.assertTrue(match(data, ast)) | |
def test_single_value_less_than_match(self): | |
data = {'a': 0} | |
ast = parser.parse('a<1') | |
self.assertTrue(match(data, ast)) | |
def test_more_complex_value_less_than_match(self): | |
data = {'a': 0, 'b': 4, 'c': 3} | |
ast = parser.parse('(a<1) AND (b>5) OR ((c>2))') | |
self.assertTrue(match(data, ast)) | |
data = {'a': 0, 'b': 6, 'c': 2} | |
ast = parser.parse('(a<1) AND (b>5) OR ((c>2))') | |
self.assertTrue(match(data, ast)) | |
def test_single_key_value_match(self): | |
data = {'a': '1'} | |
ast = parser.parse('a:1') | |
self.assertTrue(match(data, ast)) | |
def test_dotted_key_value_match(self): | |
data = {'a': {'b': '1'}} | |
ast = parser.parse('a.b:1') | |
self.assertTrue(match(data, ast)) | |
def test_dotted_key_value_match_many_nested_levels(self): | |
data = {'a': {'b': {'c': {'d': '2'}}}} | |
ast = parser.parse('a.b.c.d:2') | |
self.assertTrue(match(data, ast)) | |
def test_single_key_value_with_quotes_match(self): | |
data = {'a': '1 3'} | |
ast = parser.parse('a:"1 3"') | |
self.assertTrue(match(data, ast)) | |
def test_single_key_value_with_parens(self): | |
data = {'a': '1'} | |
ast = parser.parse('(a:1)') | |
self.assertTrue(match(data, ast)) | |
def test_single_key_value_with_nested_parens(self): | |
data = {'a': '1'} | |
ast = parser.parse('((a:1))') | |
self.assertTrue(match(data, ast)) | |
def test_match_and(self): | |
data = {'a': '1', 'b': '2'} | |
ast = parser.parse('a:1 AND b:2') | |
self.assertTrue(match(data, ast)) | |
def test_match_and_false(self): | |
data = {'a': '1', 'b': '3'} | |
ast = parser.parse('a:1 AND b:2') | |
self.assertFalse(match(data, ast)) | |
def test_match_or(self): | |
data = {'a': '1', 'b': '3'} | |
ast = parser.parse('a:1 OR b:2') | |
self.assertTrue(match(data, ast)) | |
def test_match_or_false(self): | |
data = {'a': '2', 'b': '3'} | |
ast = parser.parse('a:1 OR b:2') | |
self.assertFalse(match(data, ast)) | |
def test_match_parens_and_parens(self): | |
data = {'a': '2', 'b': '3', 'c': '4'} | |
ast = parser.parse('(a:2 AND b:3) AND (a:2 AND c:4)') | |
self.assertTrue(match(data, ast)) | |
def test_match_or_and_and(self): | |
data = {'a': '2', 'b': '3', 'c': '4'} | |
ast = parser.parse('(a:1 OR b:3) AND (a:2 AND c:4)') | |
self.assertTrue(match(data, ast)) | |
def test_match_or_and_and_false(self): | |
data = {'a': '2', 'b': '3', 'c': '4'} | |
ast = parser.parse('(a:1 OR b:4) AND (a:2 AND c:4)') | |
self.assertFalse(match(data, ast)) | |
def test_match_nested_or_and_and_or(self): | |
data = {'a': '2', 'b': '3', 'c': '4'} | |
ast = parser.parse('((a:1 OR b:3) AND (a:2 OR c:5)) AND ((a:1 OR b:3) AND c:4)') | |
self.assertTrue(match(data, ast)) | |
def test_match_nested_or_and_and_or_false(self): | |
data = {'a': '2', 'b': '3', 'c': '4'} | |
ast = parser.parse('((a:1 OR b:3) AND (a:2 OR c:5)) AND ((a:1 OR b:3) AND c:5)') | |
self.assertFalse(match(data, ast)) | |
if __name__ == '__main__': | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment