Last active
December 8, 2016 04:58
-
-
Save jakevdp/4afdc655e5b80bcc4e09618be5498681 to your computer and use it in GitHub Desktop.
Testing Parsley vs Parsimonious
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Exploring Parsing\n", | |
"\n", | |
"Parsley vs Parsimonious" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Simple: Matching an Integer" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"INTEGER_TEST_CASES = {'12345': 12345,\n", | |
" '+100': 100,\n", | |
" '-4': -4}" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Parsimonious" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"12345" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from parsimonious import NodeVisitor\n", | |
"from parsimonious.grammar import Grammar\n", | |
"\n", | |
"parsimonious_int_grammar = \"\"\"\n", | |
"integer = ~\"[+-]?[0-9]+\"\n", | |
"\"\"\"\n", | |
"\n", | |
"class IntEvaluator(NodeVisitor):\n", | |
" def __init__(self, grammar, ctx, strict=True):\n", | |
" self.grammar = Grammar(grammar)\n", | |
" self._ctx = ctx\n", | |
" self._strict = strict\n", | |
" \n", | |
" def visit_integer(self, node, children):\n", | |
" return int(node.text)\n", | |
" \n", | |
"\n", | |
"grammar = IntEvaluator(parsimonious_int_grammar, {})\n", | |
"grammar.parse('12345')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"checking '12345' ... ok\n", | |
"checking '-4' ... ok\n", | |
"checking '+100' ... ok\n" | |
] | |
} | |
], | |
"source": [ | |
"def test_integer_parsimonious():\n", | |
" grammar = IntEvaluator(parsimonious_int_grammar, {})\n", | |
" for key, val in INTEGER_TEST_CASES.items():\n", | |
" print(\"checking {0!r:<15}\".format(key), end='')\n", | |
" assert grammar.parse(key) == val\n", | |
" print(\" ... ok\")\n", | |
" \n", | |
"test_integer_parsimonious()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Parsley" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"12345" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import parsley\n", | |
"\n", | |
"parsley_int_grammar = \"\"\"\n", | |
"integer = <(\"+\" | \"-\")? digit+>:d -> int(d)\n", | |
"\"\"\"\n", | |
"\n", | |
"grammar = parsley.makeGrammar(parsley_int_grammar, {})\n", | |
"grammar('12345').integer()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"checking '12345' ... ok\n", | |
"checking '-4' ... ok\n", | |
"checking '+100' ... ok\n" | |
] | |
} | |
], | |
"source": [ | |
"def test_integer_parsley():\n", | |
" grammar = parsley.makeGrammar(parsley_int_grammar, {})\n", | |
" for key, val in INTEGER_TEST_CASES.items():\n", | |
" print(\"checking {0!r:<15}\".format(key), end='')\n", | |
" assert grammar(key).integer() == val\n", | |
" print(\" ... ok\")\n", | |
" \n", | |
"test_integer_parsley()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Simple arithmetic parser" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"ARITHMETIC_TEST_CASES = {\n", | |
" '43': 43,\n", | |
" '-3': -3,\n", | |
" '+2': +2,\n", | |
" '1+2': ('add', 1, 2),\n", | |
" '1 + 2': ('add', 1, 2),\n", | |
" '1/-2': ('div', 1, -2),\n", | |
" '44 *5': ('mul', 44, 5),\n", | |
" '-564* 4': ('mul', -564, 4)\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Parsimonious" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"('mul', 4, ('div', ('add', ('mul', ('sub', 32, 11), -44), 55), 12))" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"parsimonious_arith_grammar = r\"\"\"\n", | |
"add = (sub sp \"+\" sp add) / sub\n", | |
"\n", | |
"sub = (mul sp \"-\" sp sub) / mul\n", | |
"\n", | |
"mul = (div sp \"*\" sp mul) / div\n", | |
"\n", | |
"div = (primary sp \"/\" sp div) / primary\n", | |
"\n", | |
"primary = (\"(\" sp add sp \")\") / integer\n", | |
"\n", | |
"integer = ~\"[+-]?[0-9]+\"\n", | |
"\n", | |
"sp = \" \"*\n", | |
"\"\"\"\n", | |
"\n", | |
"\n", | |
"class SimpleArithEvaluator(NodeVisitor):\n", | |
" binary_expressions = ['add', 'sub', 'mul', 'div']\n", | |
" \n", | |
" def __init__(self, grammar, ctx, strict=True):\n", | |
" self.grammar = Grammar(grammar)\n", | |
" self._ctx = ctx\n", | |
" self._strict = strict\n", | |
" \n", | |
" def _visit_binary(self, node, children):\n", | |
" child = children[0]\n", | |
" if not hasattr(child, '__len__'):\n", | |
" return child\n", | |
" if len(child) == 3:\n", | |
" return child\n", | |
" elif len(child) == 5:\n", | |
" return (node.expr_name, child[0], child[4])\n", | |
" \n", | |
" visit_add = _visit_binary\n", | |
" visit_sub = _visit_binary\n", | |
" visit_mul = _visit_binary\n", | |
" visit_div = _visit_binary\n", | |
" \n", | |
" def visit_primary(self, node, children):\n", | |
" child = children[0]\n", | |
" if not hasattr(child, '__len__'):\n", | |
" return child\n", | |
" elif len(child) == 5:\n", | |
" return child[2]\n", | |
" \n", | |
" def visit_integer(self, node, children):\n", | |
" return int(node.text)\n", | |
" \n", | |
" def generic_visit(self, node, children):\n", | |
" return children\n", | |
" \n", | |
"grammar = SimpleArithEvaluator(parsimonious_arith_grammar, {})\n", | |
"grammar.parse('4 * ((32 - 11) * -44 + 55) / 12')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1000 loops, best of 3: 1.23 ms per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit grammar.parse('(4 * ((32 - 11) * -44 + 55) / 12) * (60 - 32 + 581)')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"checking '-3' ... ok\n", | |
"checking '+2' ... ok\n", | |
"checking '43' ... ok\n", | |
"checking '1 + 2' ... ok\n", | |
"checking '1/-2' ... ok\n", | |
"checking '44 *5' ... ok\n", | |
"checking '1+2' ... ok\n", | |
"checking '-564* 4' ... ok\n" | |
] | |
} | |
], | |
"source": [ | |
"def test_arithmetic_parsimonious():\n", | |
" grammar = SimpleArithEvaluator(parsimonious_arith_grammar, {})\n", | |
" for key, val in ARITHMETIC_TEST_CASES.items():\n", | |
" print(\"checking {0!r:<15}\".format(key), end='')\n", | |
" assert grammar.parse(key) == val\n", | |
" print(\" ... ok\")\n", | |
" \n", | |
"test_arithmetic_parsimonious()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Parsley" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"('mul', 4, ('div', ('add', ('mul', ('sub', 32, 11), -44), 55), 12))" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"parsley_arith_grammar = \"\"\"\n", | |
"parse = add\n", | |
"\n", | |
"add = sub:left sp \"+\" sp add:right -> ('add', left, right)\n", | |
" | sub:child -> child\n", | |
"\n", | |
"sub = mul:left sp \"-\" sp sub:right -> ('sub', left, right)\n", | |
" | mul:child -> child\n", | |
"\n", | |
"mul = div:left sp \"*\" sp mul:right -> ('mul', left, right)\n", | |
" | div:child -> child\n", | |
"\n", | |
"div = primary:left sp \"/\" sp div:right -> ('div', left, right)\n", | |
" | primary:child -> child\n", | |
" \n", | |
"primary = \"(\" sp add:child sp \")\" -> child\n", | |
" | integer:child -> child\n", | |
"\n", | |
"sp = ' '*\n", | |
"integer = <(\"+\" | \"-\")?digit+>:d -> int(d)\n", | |
"\"\"\"\n", | |
"\n", | |
"grammar = parsley.makeGrammar(parsley_arith_grammar, {})\n", | |
"grammar('4 * ((32 - 11) * -44 + 55) / 12').parse()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"100 loops, best of 3: 3.93 ms per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit grammar('(4 * ((32 - 11) * -44 + 55) / 12) * (60 - 32 + 581)').parse()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"checking '-3' ... ok\n", | |
"checking '+2' ... ok\n", | |
"checking '43' ... ok\n", | |
"checking '1 + 2' ... ok\n", | |
"checking '1/-2' ... ok\n", | |
"checking '44 *5' ... ok\n", | |
"checking '1+2' ... ok\n", | |
"checking '-564* 4' ... ok\n" | |
] | |
} | |
], | |
"source": [ | |
"def test_arithmetic_parsley():\n", | |
" grammar = parsley.makeGrammar(parsley_arith_grammar, {})\n", | |
" for key, val in ARITHMETIC_TEST_CASES.items():\n", | |
" print(\"checking {0!r:<15}\".format(key), end='')\n", | |
" assert grammar(key).parse() == val\n", | |
" print(\" ... ok\")\n", | |
" \n", | |
"test_arithmetic_parsley()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"## Matching numbers\n", | |
"\n", | |
"Let's build a parser that matches numbers... int, binary, octal, hex, and floats" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"NUMBER_TEST_CASES = {\n", | |
" # decimal integers\n", | |
" '12345': 12345,\n", | |
" '+100': 100,\n", | |
" '-42': -42,\n", | |
" \n", | |
" # binary integers\n", | |
" '0b11': 0b11,\n", | |
" '-0b101': -0b101,\n", | |
" '0B01': 0b01,\n", | |
" '-0B001': -0b001,\n", | |
" \n", | |
" # octal integers\n", | |
" '012': 0o12,\n", | |
" '-0132': -0o132,\n", | |
" '0o12': 0o12,\n", | |
" '-0o132': -0o132,\n", | |
" '0O12': 0o12,\n", | |
" '-0O132': -0o132,\n", | |
" \n", | |
" # hexadecimal integers\n", | |
" '0x42': 0x42,\n", | |
" '-0xFA': -0xFA,\n", | |
" '0X0F0': 0x0F0,\n", | |
" '-0X5DEF': -0x5DEF,\n", | |
" '0x42': 0x42,\n", | |
" '-0xFA': -0xfA,\n", | |
" '0X0F0': 0x0f0,\n", | |
" '-0X5DEF': -0x5DeF,\n", | |
" \n", | |
" # floating point\n", | |
" '11.': 11.0,\n", | |
" '0E5': 0.0,\n", | |
" '1.1': 1.1,\n", | |
" '0.1': 0.1,\n", | |
" '+.1': 0.1,\n", | |
" '.11': 0.11,\n", | |
" '1E4': 1E4,\n", | |
" '1.2E-3': 1.2E-3,\n", | |
" '-1E-4': -1E-4,\n", | |
" '-1.2E-3': -1.2E-3,\n", | |
" '12e4': 12E4,\n", | |
" '1.22e-3': 1.22E-3,\n", | |
" '-12e-4': -12E-4,\n", | |
" '-1.22e-3': -1.22E-3,\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Parsimonious" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"-11.0" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"parsimonious_number_grammar = r\"\"\"\n", | |
"number = hexadecimal / binary / octal / float / integer\n", | |
"\n", | |
"float = exponent_float / decimal_float\n", | |
"\n", | |
"exponent_float = (decimal_float / integer_part) (\"E\" / \"e\") integer_part\n", | |
"\n", | |
"decimal_float = ~r\"[+-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+)\"\n", | |
"\n", | |
"# integer part of floats can start with zero\n", | |
"integer_part = ~r\"[+-]?[0-9]+\"\n", | |
"\n", | |
"hexadecimal = ~r\"[+-]?0[Xx][0-9A-Fa-f]+\"\n", | |
"\n", | |
"binary = ~r\"[+-]?0[Bb][0-1]+\"\n", | |
"\n", | |
"octal = ~r\"[+-]?0[Oo]?[0-7]+\"\n", | |
"\n", | |
"# integers cannot start with 0 as that indicates octal\n", | |
"integer = ~r\"[+-]?[1-9][0-9]*\"\n", | |
"\"\"\"\n", | |
"\n", | |
"class NumberEvaluator(NodeVisitor):\n", | |
" def __init__(self, grammar, ctx, strict=True):\n", | |
" self.grammar = Grammar(grammar)\n", | |
" self._ctx = ctx\n", | |
" self._strict = strict\n", | |
" \n", | |
" def visit_number(self, node, children):\n", | |
" return children[0]\n", | |
" \n", | |
" def visit_float(self, node, children):\n", | |
" return float(node.text)\n", | |
" \n", | |
" def visit_hexadecimal(self, node, children):\n", | |
" return int(node.text, 16)\n", | |
" \n", | |
" def visit_binary(self, node, children):\n", | |
" return int(node.text, 2)\n", | |
" \n", | |
" def visit_octal(self, node, children):\n", | |
" return int(node.text, 8)\n", | |
" \n", | |
" def visit_integer(self, node, children):\n", | |
" return int(node.text)\n", | |
" \n", | |
" def generic_visit(self, node, children):\n", | |
" return children\n", | |
" \n", | |
"\n", | |
"grammar = NumberEvaluator(parsimonious_number_grammar, {})\n", | |
"grammar.parse('-11.0')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"checking '1.1' ... ok\n", | |
"checking '-0B001' ... ok\n", | |
"checking '0.1' ... ok\n", | |
"checking '1.2E-3' ... ok\n", | |
"checking '-0o132' ... ok\n", | |
"checking '0x42' ... ok\n", | |
"checking '-0xFA' ... ok\n", | |
"checking '0X0F0' ... ok\n", | |
"checking '1E4' ... ok\n", | |
"checking '-1.22e-3' ... ok\n", | |
"checking '-0b101' ... ok\n", | |
"checking '-12e-4' ... ok\n", | |
"checking '-0132' ... ok\n", | |
"checking '0O12' ... ok\n", | |
"checking '-1.2E-3' ... ok\n", | |
"checking '1.22e-3' ... ok\n", | |
"checking '.11' ... ok\n", | |
"checking '0B01' ... ok\n", | |
"checking '-0O132' ... ok\n", | |
"checking '12e4' ... ok\n", | |
"checking '12345' ... ok\n", | |
"checking '0b11' ... ok\n", | |
"checking '-0X5DEF' ... ok\n", | |
"checking '+.1' ... ok\n", | |
"checking '11.' ... ok\n", | |
"checking '0E5' ... ok\n", | |
"checking '-1E-4' ... ok\n", | |
"checking '+100' ... ok\n", | |
"checking '012' ... ok\n", | |
"checking '0o12' ... ok\n", | |
"checking '-42' ... ok\n" | |
] | |
} | |
], | |
"source": [ | |
"def test_number_parsimonious():\n", | |
" grammar = NumberEvaluator(parsimonious_number_grammar, {})\n", | |
" for key, val in NUMBER_TEST_CASES.items():\n", | |
" print(\"checking {0!r:<15}\".format(key), end='')\n", | |
" assert grammar.parse(key) == val\n", | |
" print(\" ... ok\")\n", | |
" \n", | |
"test_number_parsimonious()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"### Parsley" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"-40000000.0" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"parsley_number_grammar = \"\"\"\n", | |
"number = hexadecimal | octal | binary | float | integer\n", | |
"\n", | |
"binary = <sign \"0\" (\"B\" | \"b\") digit0_1+>:d -> int(d, 2)\n", | |
"\n", | |
"octal = <sign \"0\" (\"O\" | \"o\")? digit0_7+>:d -> int(d, 8)\n", | |
"\n", | |
"hexadecimal = <sign \"0\" (\"X\" | \"x\") digit_hex+>:d -> int(d, 16)\n", | |
"\n", | |
"float = exponent_float | decimal_float\n", | |
"\n", | |
"exponent_float = <sign (decimal_float | digit+) (\"E\" | \"e\") sign digit+>:d -> float(d)\n", | |
"\n", | |
"decimal_float = <sign digit+ \".\" digit*>:d -> float(d)\n", | |
" | <sign \".\" digit+>:d -> float(d)\n", | |
"\n", | |
"# integers cannot start with 0 as that indicates octal\n", | |
"integer = <sign digit1_9 digit*>:d -> int(d)\n", | |
"\n", | |
"# digit components\n", | |
"sign = <(\"+\" | \"-\")?>:s -> s\n", | |
"digit = :x ?(x in '0123456789') -> x\n", | |
"digit1_9 = :x ?(x in '123456789') -> x\n", | |
"digit0_7 = :x ?(x in '01234567') -> x\n", | |
"digit0_1 = :x ?(x in '01') -> x\n", | |
"digit_hex = :x ?(x in '0123456789abcdefABCDEF') -> x\n", | |
"\"\"\"\n", | |
"\n", | |
"grammar = parsley.makeGrammar(parsley_number_grammar, {})\n", | |
"grammar('-400.0E5').number()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"checking '1.1' ... ok\n", | |
"checking '-0B001' ... ok\n", | |
"checking '0.1' ... ok\n", | |
"checking '1.2E-3' ... ok\n", | |
"checking '-0o132' ... ok\n", | |
"checking '0x42' ... ok\n", | |
"checking '-0xFA' ... ok\n", | |
"checking '0X0F0' ... ok\n", | |
"checking '1E4' ... ok\n", | |
"checking '-1.22e-3' ... ok\n", | |
"checking '-0b101' ... ok\n", | |
"checking '-12e-4' ... ok\n", | |
"checking '-0132' ... ok\n", | |
"checking '0O12' ... ok\n", | |
"checking '-1.2E-3' ... ok\n", | |
"checking '1.22e-3' ... ok\n", | |
"checking '.11' ... ok\n", | |
"checking '0B01' ... ok\n", | |
"checking '-0O132' ... ok\n", | |
"checking '12e4' ... ok\n", | |
"checking '12345' ... ok\n", | |
"checking '0b11' ... ok\n", | |
"checking '-0X5DEF' ... ok\n", | |
"checking '+.1' ... ok\n", | |
"checking '11.' ... ok\n", | |
"checking '0E5' ... ok\n", | |
"checking '-1E-4' ... ok\n", | |
"checking '+100' ... ok\n", | |
"checking '012' ... ok\n", | |
"checking '0o12' ... ok\n", | |
"checking '-42' ... ok\n" | |
] | |
} | |
], | |
"source": [ | |
"def test_number_parsley():\n", | |
" grammar = parsley.makeGrammar(parsley_number_grammar, {})\n", | |
" for key, val in NUMBER_TEST_CASES.items():\n", | |
" print(\"checking {0!r:<15}\".format(key), end='')\n", | |
" assert grammar(key).number() == val\n", | |
" print(\" ... ok\")\n", | |
" \n", | |
"test_number_parsley()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Scratch space" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"parsimonious_literal_grammar = r\"\"\"\n", | |
"literal = string / float / integer\n", | |
"\n", | |
"string = ~r\"([\\'\\\"])([^\\1]*)\\1\"\n", | |
"\n", | |
"float = exponent_float / decimal_float\n", | |
"exponent_float = (decimal_float / (sign digits)) ~\"[Ee]\" sign digits\n", | |
"decimal_float = (sign digits \".\" digits) / (sign digits \".\") / (\".\" digits)\n", | |
"\n", | |
"integer = sign digits\n", | |
"\n", | |
"sign = ~\"[+-]?\"\n", | |
"digits = ~\"[0-9]+\"\n", | |
"\"\"\"\n", | |
"\n", | |
"class LiteralEvaluator(NodeVisitor):\n", | |
" def __init__(self, grammar, ctx, strict=True):\n", | |
" self.grammar = Grammar(grammar)\n", | |
" self._ctx = ctx\n", | |
" self._strict = strict\n", | |
" \n", | |
" def visit_string(self, node, children):\n", | |
" return node.match.groups()[1]\n", | |
" \n", | |
" def visit_float(self, node, children):\n", | |
" return float(node.full_text)\n", | |
" \n", | |
" def visit_integer(self, node, children):\n", | |
" return int(node.full_text)\n", | |
" \n", | |
" def visit_literal(self, node, children):\n", | |
" return children[0]\n", | |
" \n", | |
" def generic_visit(self, node, children):\n", | |
" return children\n", | |
"\n", | |
"grammar = LiteralEvaluator(parsimonious_literal_grammar, {})\n", | |
"grammar.parse('1.4E6')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"LITERAL_TEST_CASES = {'100': 100,\n", | |
" '2.5': 2.5,\n", | |
" '.5': 0.5,\n", | |
" '5.': 5.0,\n", | |
" '1E6': 1E6,\n", | |
" '1e6': 1e6,\n", | |
" '1.4E7': 1.4E7,\n", | |
" '\"hello there agent #7\"': 'hello there agent #7',\n", | |
" \"'hello 45.9/!'\": 'hello 45.9/!'}\n", | |
"\n", | |
"def test_literal_evaluator():\n", | |
" parser = LiteralEvaluator(parsimonious_literal_grammar, {})\n", | |
"\n", | |
" for key, val in LITERAL_TEST_CASES.items():\n", | |
" result = parser.parse(key)\n", | |
" assert type(result) == type(val)\n", | |
" assert result == val\n", | |
" \n", | |
"test_literal_evaluator()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"grammar = Grammar(r\"\"\"\n", | |
"expr\n", | |
"expr = binop / parens / func / identifier / sequence / literal\n", | |
"\n", | |
"parens = \"(\" space expr space \")\"\n", | |
"\n", | |
"\n", | |
"# Arithmetic Operators\n", | |
"\n", | |
"binop = muldiv / addsub\n", | |
"\n", | |
"addsub = add / sub\n", | |
"\n", | |
"muldiv = mul / div\n", | |
"\n", | |
"add = expr space \"+\" space expr\n", | |
"\n", | |
"sub = expr space \"-\" space expr\n", | |
"\n", | |
"mul = expr space \"*\" space expr\n", | |
"\n", | |
"div = expr space \"/\" space expr\n", | |
"\n", | |
"\n", | |
"# Functions and Identifiers\n", | |
"\n", | |
"func = identifier space \"(\" seq \")\"\n", | |
"\n", | |
"identifier = ~\"[_a-zA-Z][_a-zA-Z0-9]*\"\n", | |
"\n", | |
"\n", | |
"# Lists and mappings\n", | |
"\n", | |
"sequence = list / tuple / set / mapping\n", | |
"\n", | |
"list = \"[\" seq \"]\"\n", | |
"\n", | |
"tuple = \"(\" seq \")\"\n", | |
"\n", | |
"set = \"{\" seq \"}\"\n", | |
"\n", | |
"mapping = \"{\" mappingseq \"}\"\n", | |
"\n", | |
"seq = space (seqitem space \",\" space)* (seqitem space)?\n", | |
"\n", | |
"seqitem = expr\n", | |
"\n", | |
"mappingseq = space (mappingitem space \",\" space)* (mappingitem space)?\n", | |
"\n", | |
"mappingitem = identifier space \":\" space expr\n", | |
"\n", | |
"\n", | |
"# Literal objects\n", | |
"\n", | |
"literal = string / float / integer\n", | |
"\n", | |
"string = ~\"\\\"[^\\\"]*\\\"|\\'[^\\']*\\'\"\n", | |
"\n", | |
"number = float / integer\n", | |
"\n", | |
"float = (integer \".\" digits) / (integer \".\") / (\".\" digits) / (integer ~\"[Ee]\" integer)\n", | |
"\n", | |
"integer = \"-\"? digits\n", | |
"\n", | |
"\n", | |
"# Building blocks\n", | |
"\n", | |
"space = \" \"*\n", | |
"digits = ~\"[0-9]+\"\n", | |
"\"\"\")\n", | |
"\n", | |
"#g = grammar.parse('[foo( -2e-6, blah ), 5.0]')\n", | |
"g = grammar.parse('{hello: blah, foo: \"bar\"} + 4')\n", | |
"print(g.full_text)\n", | |
"print(g)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# precedence\n", | |
"\n", | |
"# comma/sequence\n", | |
"# inline conditional ? :\n", | |
"# logical or\n", | |
"# logical and\n", | |
"# equality/inequality/strict equality/strict inequality\n", | |
"# greater/less than (or equal to)\n", | |
"# bit-shifts << >> >>>\n", | |
"# addition/subtraction\n", | |
"# multiplication/division/mod/exponentiation\n", | |
"# binary/logical not; unary +/-\n", | |
"# function call\n", | |
"# member access via . or []\n", | |
"# parens" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"'11.' -> float\n", | |
"'1.1' -> float\n", | |
"'.11' -> float\n", | |
"'11' -> decimal\n", | |
"'0b11' -> binary\n", | |
"'011' -> octal\n", | |
"'0x11' -> hexadecimal\n", | |
"\n", | |
"str has \"\" or ''" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import parsley\n", | |
"numbers = parsley.makeGrammar(\"\"\"\n", | |
"number = float | integer\n", | |
"integer = <digit+>:d -> int(d)\n", | |
"float = \".\" <digit+>:B -> float('.' + B)\n", | |
" | <digit+>:A \".\" <digit*>:B -> float(A + '.' + B)\n", | |
" | <digit+>:val1 \".\" -> float(A + '.')\n", | |
"\"\"\", {})\n", | |
"\n", | |
"numbers('4.').number()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import parsley\n", | |
"\n", | |
"def calculate(start, pairs):\n", | |
" result = start\n", | |
" for op, value in pairs:\n", | |
" if op == '+':\n", | |
" result += value\n", | |
" elif op == '-':\n", | |
" result -= value\n", | |
" elif op == '*':\n", | |
" result *= value\n", | |
" elif op == '/':\n", | |
" result /= value\n", | |
" return result\n", | |
"\n", | |
"grammar = parsley.makeGrammar(\"\"\"\n", | |
"number = <digit+>:ds -> int(ds)\n", | |
"parens = '(' ws expr:e ws ')' -> e\n", | |
"value = number | parens\n", | |
"ws = ' '*\n", | |
"add = '+' ws expr2:n -> ('+', n)\n", | |
"sub = '-' ws expr2:n -> ('-', n)\n", | |
"mul = '*' ws value:n -> ('*', n)\n", | |
"div = '/' ws value:n -> ('/', n)\n", | |
"\n", | |
"addsub = ws (add | sub)\n", | |
"muldiv = ws (mul | div)\n", | |
"\n", | |
"expr = expr2:left addsub*:right -> calculate(left, right)\n", | |
"expr2 = value:left muldiv*:right -> calculate(left, right)\n", | |
"\"\"\", {\"calculate\": calculate})\n", | |
"\n", | |
"g = grammar(\"4 * (5 + 6) + 1\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"grammar??" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"grammar = parsley.makeGrammar(\"\"\"\n", | |
"integer = <digit+>:val -> int(val)\n", | |
"float = \".\" <digit+>:val2 -> float('.' + val2)\n", | |
" | <digit+>:val1 \".\" <digit*>:val2 -> float(val1 + '.' + val2)\n", | |
" | <digit+>:val1 \".\" -> float(val1)\n", | |
"number = (float | integer)\n", | |
"\n", | |
"identifier = <(letter | '_')>:first <(letter | digit | '_')*>:rest -> first + rest\n", | |
"ws = \" \"*\n", | |
"\n", | |
"parens = \"(\" ws value:v ws \")\" -> v\n", | |
"\n", | |
"func = identifier:funcname ws \"(\" ws value:arg ws \")\" -> (funcname, arg)\n", | |
"\n", | |
"value = func | parens | identifier | number\n", | |
"\"\"\", {})\n", | |
"\n", | |
"grammar('foo(x)').value()" | |
] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python 3.5", | |
"language": "", | |
"name": "python3.5" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is the Parsimonious documentation, at this point. Thanks for posting it:-)