Skip to content

Instantly share code, notes, and snippets.

@jakevdp
Last active December 8, 2016 04:58
Show Gist options
  • Save jakevdp/4afdc655e5b80bcc4e09618be5498681 to your computer and use it in GitHub Desktop.
Save jakevdp/4afdc655e5b80bcc4e09618be5498681 to your computer and use it in GitHub Desktop.
Testing Parsley vs Parsimonious
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Exploring Parsing\n",
"\n",
"Parsley vs Parsimonious"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Simple: Matching an Integer"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"INTEGER_TEST_CASES = {'12345': 12345,\n",
" '+100': 100,\n",
" '-4': -4}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Parsimonious"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"12345"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from parsimonious import NodeVisitor\n",
"from parsimonious.grammar import Grammar\n",
"\n",
"parsimonious_int_grammar = \"\"\"\n",
"integer = ~\"[+-]?[0-9]+\"\n",
"\"\"\"\n",
"\n",
"class IntEvaluator(NodeVisitor):\n",
" def __init__(self, grammar, ctx, strict=True):\n",
" self.grammar = Grammar(grammar)\n",
" self._ctx = ctx\n",
" self._strict = strict\n",
" \n",
" def visit_integer(self, node, children):\n",
" return int(node.text)\n",
" \n",
"\n",
"grammar = IntEvaluator(parsimonious_int_grammar, {})\n",
"grammar.parse('12345')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"checking '12345' ... ok\n",
"checking '-4' ... ok\n",
"checking '+100' ... ok\n"
]
}
],
"source": [
"def test_integer_parsimonious():\n",
" grammar = IntEvaluator(parsimonious_int_grammar, {})\n",
" for key, val in INTEGER_TEST_CASES.items():\n",
" print(\"checking {0!r:<15}\".format(key), end='')\n",
" assert grammar.parse(key) == val\n",
" print(\" ... ok\")\n",
" \n",
"test_integer_parsimonious()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Parsley"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"12345"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import parsley\n",
"\n",
"parsley_int_grammar = \"\"\"\n",
"integer = <(\"+\" | \"-\")? digit+>:d -> int(d)\n",
"\"\"\"\n",
"\n",
"grammar = parsley.makeGrammar(parsley_int_grammar, {})\n",
"grammar('12345').integer()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"checking '12345' ... ok\n",
"checking '-4' ... ok\n",
"checking '+100' ... ok\n"
]
}
],
"source": [
"def test_integer_parsley():\n",
" grammar = parsley.makeGrammar(parsley_int_grammar, {})\n",
" for key, val in INTEGER_TEST_CASES.items():\n",
" print(\"checking {0!r:<15}\".format(key), end='')\n",
" assert grammar(key).integer() == val\n",
" print(\" ... ok\")\n",
" \n",
"test_integer_parsley()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Simple arithmetic parser"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ARITHMETIC_TEST_CASES = {\n",
" '43': 43,\n",
" '-3': -3,\n",
" '+2': +2,\n",
" '1+2': ('add', 1, 2),\n",
" '1 + 2': ('add', 1, 2),\n",
" '1/-2': ('div', 1, -2),\n",
" '44 *5': ('mul', 44, 5),\n",
" '-564* 4': ('mul', -564, 4)\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Parsimonious"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"('mul', 4, ('div', ('add', ('mul', ('sub', 32, 11), -44), 55), 12))"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parsimonious_arith_grammar = r\"\"\"\n",
"add = (sub sp \"+\" sp add) / sub\n",
"\n",
"sub = (mul sp \"-\" sp sub) / mul\n",
"\n",
"mul = (div sp \"*\" sp mul) / div\n",
"\n",
"div = (primary sp \"/\" sp div) / primary\n",
"\n",
"primary = (\"(\" sp add sp \")\") / integer\n",
"\n",
"integer = ~\"[+-]?[0-9]+\"\n",
"\n",
"sp = \" \"*\n",
"\"\"\"\n",
"\n",
"\n",
"class SimpleArithEvaluator(NodeVisitor):\n",
" binary_expressions = ['add', 'sub', 'mul', 'div']\n",
" \n",
" def __init__(self, grammar, ctx, strict=True):\n",
" self.grammar = Grammar(grammar)\n",
" self._ctx = ctx\n",
" self._strict = strict\n",
" \n",
" def _visit_binary(self, node, children):\n",
" child = children[0]\n",
" if not hasattr(child, '__len__'):\n",
" return child\n",
" if len(child) == 3:\n",
" return child\n",
" elif len(child) == 5:\n",
" return (node.expr_name, child[0], child[4])\n",
" \n",
" visit_add = _visit_binary\n",
" visit_sub = _visit_binary\n",
" visit_mul = _visit_binary\n",
" visit_div = _visit_binary\n",
" \n",
" def visit_primary(self, node, children):\n",
" child = children[0]\n",
" if not hasattr(child, '__len__'):\n",
" return child\n",
" elif len(child) == 5:\n",
" return child[2]\n",
" \n",
" def visit_integer(self, node, children):\n",
" return int(node.text)\n",
" \n",
" def generic_visit(self, node, children):\n",
" return children\n",
" \n",
"grammar = SimpleArithEvaluator(parsimonious_arith_grammar, {})\n",
"grammar.parse('4 * ((32 - 11) * -44 + 55) / 12')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000 loops, best of 3: 1.23 ms per loop\n"
]
}
],
"source": [
"%timeit grammar.parse('(4 * ((32 - 11) * -44 + 55) / 12) * (60 - 32 + 581)')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"checking '-3' ... ok\n",
"checking '+2' ... ok\n",
"checking '43' ... ok\n",
"checking '1 + 2' ... ok\n",
"checking '1/-2' ... ok\n",
"checking '44 *5' ... ok\n",
"checking '1+2' ... ok\n",
"checking '-564* 4' ... ok\n"
]
}
],
"source": [
"def test_arithmetic_parsimonious():\n",
" grammar = SimpleArithEvaluator(parsimonious_arith_grammar, {})\n",
" for key, val in ARITHMETIC_TEST_CASES.items():\n",
" print(\"checking {0!r:<15}\".format(key), end='')\n",
" assert grammar.parse(key) == val\n",
" print(\" ... ok\")\n",
" \n",
"test_arithmetic_parsimonious()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Parsley"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"('mul', 4, ('div', ('add', ('mul', ('sub', 32, 11), -44), 55), 12))"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parsley_arith_grammar = \"\"\"\n",
"parse = add\n",
"\n",
"add = sub:left sp \"+\" sp add:right -> ('add', left, right)\n",
" | sub:child -> child\n",
"\n",
"sub = mul:left sp \"-\" sp sub:right -> ('sub', left, right)\n",
" | mul:child -> child\n",
"\n",
"mul = div:left sp \"*\" sp mul:right -> ('mul', left, right)\n",
" | div:child -> child\n",
"\n",
"div = primary:left sp \"/\" sp div:right -> ('div', left, right)\n",
" | primary:child -> child\n",
" \n",
"primary = \"(\" sp add:child sp \")\" -> child\n",
" | integer:child -> child\n",
"\n",
"sp = ' '*\n",
"integer = <(\"+\" | \"-\")?digit+>:d -> int(d)\n",
"\"\"\"\n",
"\n",
"grammar = parsley.makeGrammar(parsley_arith_grammar, {})\n",
"grammar('4 * ((32 - 11) * -44 + 55) / 12').parse()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 loops, best of 3: 3.93 ms per loop\n"
]
}
],
"source": [
"%timeit grammar('(4 * ((32 - 11) * -44 + 55) / 12) * (60 - 32 + 581)').parse()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"checking '-3' ... ok\n",
"checking '+2' ... ok\n",
"checking '43' ... ok\n",
"checking '1 + 2' ... ok\n",
"checking '1/-2' ... ok\n",
"checking '44 *5' ... ok\n",
"checking '1+2' ... ok\n",
"checking '-564* 4' ... ok\n"
]
}
],
"source": [
"def test_arithmetic_parsley():\n",
" grammar = parsley.makeGrammar(parsley_arith_grammar, {})\n",
" for key, val in ARITHMETIC_TEST_CASES.items():\n",
" print(\"checking {0!r:<15}\".format(key), end='')\n",
" assert grammar(key).parse() == val\n",
" print(\" ... ok\")\n",
" \n",
"test_arithmetic_parsley()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Matching numbers\n",
"\n",
"Let's build a parser that matches numbers... int, binary, octal, hex, and floats"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"NUMBER_TEST_CASES = {\n",
" # decimal integers\n",
" '12345': 12345,\n",
" '+100': 100,\n",
" '-42': -42,\n",
" \n",
" # binary integers\n",
" '0b11': 0b11,\n",
" '-0b101': -0b101,\n",
" '0B01': 0b01,\n",
" '-0B001': -0b001,\n",
" \n",
" # octal integers\n",
" '012': 0o12,\n",
" '-0132': -0o132,\n",
" '0o12': 0o12,\n",
" '-0o132': -0o132,\n",
" '0O12': 0o12,\n",
" '-0O132': -0o132,\n",
" \n",
" # hexadecimal integers\n",
" '0x42': 0x42,\n",
" '-0xFA': -0xFA,\n",
" '0X0F0': 0x0F0,\n",
" '-0X5DEF': -0x5DEF,\n",
" '0x42': 0x42,\n",
" '-0xFA': -0xfA,\n",
" '0X0F0': 0x0f0,\n",
" '-0X5DEF': -0x5DeF,\n",
" \n",
" # floating point\n",
" '11.': 11.0,\n",
" '0E5': 0.0,\n",
" '1.1': 1.1,\n",
" '0.1': 0.1,\n",
" '+.1': 0.1,\n",
" '.11': 0.11,\n",
" '1E4': 1E4,\n",
" '1.2E-3': 1.2E-3,\n",
" '-1E-4': -1E-4,\n",
" '-1.2E-3': -1.2E-3,\n",
" '12e4': 12E4,\n",
" '1.22e-3': 1.22E-3,\n",
" '-12e-4': -12E-4,\n",
" '-1.22e-3': -1.22E-3,\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Parsimonious"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"-11.0"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parsimonious_number_grammar = r\"\"\"\n",
"number = hexadecimal / binary / octal / float / integer\n",
"\n",
"float = exponent_float / decimal_float\n",
"\n",
"exponent_float = (decimal_float / integer_part) (\"E\" / \"e\") integer_part\n",
"\n",
"decimal_float = ~r\"[+-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+)\"\n",
"\n",
"# integer part of floats can start with zero\n",
"integer_part = ~r\"[+-]?[0-9]+\"\n",
"\n",
"hexadecimal = ~r\"[+-]?0[Xx][0-9A-Fa-f]+\"\n",
"\n",
"binary = ~r\"[+-]?0[Bb][0-1]+\"\n",
"\n",
"octal = ~r\"[+-]?0[Oo]?[0-7]+\"\n",
"\n",
"# integers cannot start with 0 as that indicates octal\n",
"integer = ~r\"[+-]?[1-9][0-9]*\"\n",
"\"\"\"\n",
"\n",
"class NumberEvaluator(NodeVisitor):\n",
" def __init__(self, grammar, ctx, strict=True):\n",
" self.grammar = Grammar(grammar)\n",
" self._ctx = ctx\n",
" self._strict = strict\n",
" \n",
" def visit_number(self, node, children):\n",
" return children[0]\n",
" \n",
" def visit_float(self, node, children):\n",
" return float(node.text)\n",
" \n",
" def visit_hexadecimal(self, node, children):\n",
" return int(node.text, 16)\n",
" \n",
" def visit_binary(self, node, children):\n",
" return int(node.text, 2)\n",
" \n",
" def visit_octal(self, node, children):\n",
" return int(node.text, 8)\n",
" \n",
" def visit_integer(self, node, children):\n",
" return int(node.text)\n",
" \n",
" def generic_visit(self, node, children):\n",
" return children\n",
" \n",
"\n",
"grammar = NumberEvaluator(parsimonious_number_grammar, {})\n",
"grammar.parse('-11.0')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"checking '1.1' ... ok\n",
"checking '-0B001' ... ok\n",
"checking '0.1' ... ok\n",
"checking '1.2E-3' ... ok\n",
"checking '-0o132' ... ok\n",
"checking '0x42' ... ok\n",
"checking '-0xFA' ... ok\n",
"checking '0X0F0' ... ok\n",
"checking '1E4' ... ok\n",
"checking '-1.22e-3' ... ok\n",
"checking '-0b101' ... ok\n",
"checking '-12e-4' ... ok\n",
"checking '-0132' ... ok\n",
"checking '0O12' ... ok\n",
"checking '-1.2E-3' ... ok\n",
"checking '1.22e-3' ... ok\n",
"checking '.11' ... ok\n",
"checking '0B01' ... ok\n",
"checking '-0O132' ... ok\n",
"checking '12e4' ... ok\n",
"checking '12345' ... ok\n",
"checking '0b11' ... ok\n",
"checking '-0X5DEF' ... ok\n",
"checking '+.1' ... ok\n",
"checking '11.' ... ok\n",
"checking '0E5' ... ok\n",
"checking '-1E-4' ... ok\n",
"checking '+100' ... ok\n",
"checking '012' ... ok\n",
"checking '0o12' ... ok\n",
"checking '-42' ... ok\n"
]
}
],
"source": [
"def test_number_parsimonious():\n",
" grammar = NumberEvaluator(parsimonious_number_grammar, {})\n",
" for key, val in NUMBER_TEST_CASES.items():\n",
" print(\"checking {0!r:<15}\".format(key), end='')\n",
" assert grammar.parse(key) == val\n",
" print(\" ... ok\")\n",
" \n",
"test_number_parsimonious()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"### Parsley"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"-40000000.0"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parsley_number_grammar = \"\"\"\n",
"number = hexadecimal | octal | binary | float | integer\n",
"\n",
"binary = <sign \"0\" (\"B\" | \"b\") digit0_1+>:d -> int(d, 2)\n",
"\n",
"octal = <sign \"0\" (\"O\" | \"o\")? digit0_7+>:d -> int(d, 8)\n",
"\n",
"hexadecimal = <sign \"0\" (\"X\" | \"x\") digit_hex+>:d -> int(d, 16)\n",
"\n",
"float = exponent_float | decimal_float\n",
"\n",
"exponent_float = <sign (decimal_float | digit+) (\"E\" | \"e\") sign digit+>:d -> float(d)\n",
"\n",
"decimal_float = <sign digit+ \".\" digit*>:d -> float(d)\n",
" | <sign \".\" digit+>:d -> float(d)\n",
"\n",
"# integers cannot start with 0 as that indicates octal\n",
"integer = <sign digit1_9 digit*>:d -> int(d)\n",
"\n",
"# digit components\n",
"sign = <(\"+\" | \"-\")?>:s -> s\n",
"digit = :x ?(x in '0123456789') -> x\n",
"digit1_9 = :x ?(x in '123456789') -> x\n",
"digit0_7 = :x ?(x in '01234567') -> x\n",
"digit0_1 = :x ?(x in '01') -> x\n",
"digit_hex = :x ?(x in '0123456789abcdefABCDEF') -> x\n",
"\"\"\"\n",
"\n",
"grammar = parsley.makeGrammar(parsley_number_grammar, {})\n",
"grammar('-400.0E5').number()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"checking '1.1' ... ok\n",
"checking '-0B001' ... ok\n",
"checking '0.1' ... ok\n",
"checking '1.2E-3' ... ok\n",
"checking '-0o132' ... ok\n",
"checking '0x42' ... ok\n",
"checking '-0xFA' ... ok\n",
"checking '0X0F0' ... ok\n",
"checking '1E4' ... ok\n",
"checking '-1.22e-3' ... ok\n",
"checking '-0b101' ... ok\n",
"checking '-12e-4' ... ok\n",
"checking '-0132' ... ok\n",
"checking '0O12' ... ok\n",
"checking '-1.2E-3' ... ok\n",
"checking '1.22e-3' ... ok\n",
"checking '.11' ... ok\n",
"checking '0B01' ... ok\n",
"checking '-0O132' ... ok\n",
"checking '12e4' ... ok\n",
"checking '12345' ... ok\n",
"checking '0b11' ... ok\n",
"checking '-0X5DEF' ... ok\n",
"checking '+.1' ... ok\n",
"checking '11.' ... ok\n",
"checking '0E5' ... ok\n",
"checking '-1E-4' ... ok\n",
"checking '+100' ... ok\n",
"checking '012' ... ok\n",
"checking '0o12' ... ok\n",
"checking '-42' ... ok\n"
]
}
],
"source": [
"def test_number_parsley():\n",
" grammar = parsley.makeGrammar(parsley_number_grammar, {})\n",
" for key, val in NUMBER_TEST_CASES.items():\n",
" print(\"checking {0!r:<15}\".format(key), end='')\n",
" assert grammar(key).number() == val\n",
" print(\" ... ok\")\n",
" \n",
"test_number_parsley()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scratch space"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"parsimonious_literal_grammar = r\"\"\"\n",
"literal = string / float / integer\n",
"\n",
"string = ~r\"([\\'\\\"])([^\\1]*)\\1\"\n",
"\n",
"float = exponent_float / decimal_float\n",
"exponent_float = (decimal_float / (sign digits)) ~\"[Ee]\" sign digits\n",
"decimal_float = (sign digits \".\" digits) / (sign digits \".\") / (\".\" digits)\n",
"\n",
"integer = sign digits\n",
"\n",
"sign = ~\"[+-]?\"\n",
"digits = ~\"[0-9]+\"\n",
"\"\"\"\n",
"\n",
"class LiteralEvaluator(NodeVisitor):\n",
" def __init__(self, grammar, ctx, strict=True):\n",
" self.grammar = Grammar(grammar)\n",
" self._ctx = ctx\n",
" self._strict = strict\n",
" \n",
" def visit_string(self, node, children):\n",
" return node.match.groups()[1]\n",
" \n",
" def visit_float(self, node, children):\n",
" return float(node.full_text)\n",
" \n",
" def visit_integer(self, node, children):\n",
" return int(node.full_text)\n",
" \n",
" def visit_literal(self, node, children):\n",
" return children[0]\n",
" \n",
" def generic_visit(self, node, children):\n",
" return children\n",
"\n",
"grammar = LiteralEvaluator(parsimonious_literal_grammar, {})\n",
"grammar.parse('1.4E6')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"LITERAL_TEST_CASES = {'100': 100,\n",
" '2.5': 2.5,\n",
" '.5': 0.5,\n",
" '5.': 5.0,\n",
" '1E6': 1E6,\n",
" '1e6': 1e6,\n",
" '1.4E7': 1.4E7,\n",
" '\"hello there agent #7\"': 'hello there agent #7',\n",
" \"'hello 45.9/!'\": 'hello 45.9/!'}\n",
"\n",
"def test_literal_evaluator():\n",
" parser = LiteralEvaluator(parsimonious_literal_grammar, {})\n",
"\n",
" for key, val in LITERAL_TEST_CASES.items():\n",
" result = parser.parse(key)\n",
" assert type(result) == type(val)\n",
" assert result == val\n",
" \n",
"test_literal_evaluator()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"grammar = Grammar(r\"\"\"\n",
"expr\n",
"expr = binop / parens / func / identifier / sequence / literal\n",
"\n",
"parens = \"(\" space expr space \")\"\n",
"\n",
"\n",
"# Arithmetic Operators\n",
"\n",
"binop = muldiv / addsub\n",
"\n",
"addsub = add / sub\n",
"\n",
"muldiv = mul / div\n",
"\n",
"add = expr space \"+\" space expr\n",
"\n",
"sub = expr space \"-\" space expr\n",
"\n",
"mul = expr space \"*\" space expr\n",
"\n",
"div = expr space \"/\" space expr\n",
"\n",
"\n",
"# Functions and Identifiers\n",
"\n",
"func = identifier space \"(\" seq \")\"\n",
"\n",
"identifier = ~\"[_a-zA-Z][_a-zA-Z0-9]*\"\n",
"\n",
"\n",
"# Lists and mappings\n",
"\n",
"sequence = list / tuple / set / mapping\n",
"\n",
"list = \"[\" seq \"]\"\n",
"\n",
"tuple = \"(\" seq \")\"\n",
"\n",
"set = \"{\" seq \"}\"\n",
"\n",
"mapping = \"{\" mappingseq \"}\"\n",
"\n",
"seq = space (seqitem space \",\" space)* (seqitem space)?\n",
"\n",
"seqitem = expr\n",
"\n",
"mappingseq = space (mappingitem space \",\" space)* (mappingitem space)?\n",
"\n",
"mappingitem = identifier space \":\" space expr\n",
"\n",
"\n",
"# Literal objects\n",
"\n",
"literal = string / float / integer\n",
"\n",
"string = ~\"\\\"[^\\\"]*\\\"|\\'[^\\']*\\'\"\n",
"\n",
"number = float / integer\n",
"\n",
"float = (integer \".\" digits) / (integer \".\") / (\".\" digits) / (integer ~\"[Ee]\" integer)\n",
"\n",
"integer = \"-\"? digits\n",
"\n",
"\n",
"# Building blocks\n",
"\n",
"space = \" \"*\n",
"digits = ~\"[0-9]+\"\n",
"\"\"\")\n",
"\n",
"#g = grammar.parse('[foo( -2e-6, blah ), 5.0]')\n",
"g = grammar.parse('{hello: blah, foo: \"bar\"} + 4')\n",
"print(g.full_text)\n",
"print(g)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# precedence\n",
"\n",
"# comma/sequence\n",
"# inline conditional ? :\n",
"# logical or\n",
"# logical and\n",
"# equality/inequality/strict equality/strict inequality\n",
"# greater/less than (or equal to)\n",
"# bit-shifts << >> >>>\n",
"# addition/subtraction\n",
"# multiplication/division/mod/exponentiation\n",
"# binary/logical not; unary +/-\n",
"# function call\n",
"# member access via . or []\n",
"# parens"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"'11.' -> float\n",
"'1.1' -> float\n",
"'.11' -> float\n",
"'11' -> decimal\n",
"'0b11' -> binary\n",
"'011' -> octal\n",
"'0x11' -> hexadecimal\n",
"\n",
"str has \"\" or ''"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import parsley\n",
"numbers = parsley.makeGrammar(\"\"\"\n",
"number = float | integer\n",
"integer = <digit+>:d -> int(d)\n",
"float = \".\" <digit+>:B -> float('.' + B)\n",
" | <digit+>:A \".\" <digit*>:B -> float(A + '.' + B)\n",
" | <digit+>:val1 \".\" -> float(A + '.')\n",
"\"\"\", {})\n",
"\n",
"numbers('4.').number()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import parsley\n",
"\n",
"def calculate(start, pairs):\n",
" result = start\n",
" for op, value in pairs:\n",
" if op == '+':\n",
" result += value\n",
" elif op == '-':\n",
" result -= value\n",
" elif op == '*':\n",
" result *= value\n",
" elif op == '/':\n",
" result /= value\n",
" return result\n",
"\n",
"grammar = parsley.makeGrammar(\"\"\"\n",
"number = <digit+>:ds -> int(ds)\n",
"parens = '(' ws expr:e ws ')' -> e\n",
"value = number | parens\n",
"ws = ' '*\n",
"add = '+' ws expr2:n -> ('+', n)\n",
"sub = '-' ws expr2:n -> ('-', n)\n",
"mul = '*' ws value:n -> ('*', n)\n",
"div = '/' ws value:n -> ('/', n)\n",
"\n",
"addsub = ws (add | sub)\n",
"muldiv = ws (mul | div)\n",
"\n",
"expr = expr2:left addsub*:right -> calculate(left, right)\n",
"expr2 = value:left muldiv*:right -> calculate(left, right)\n",
"\"\"\", {\"calculate\": calculate})\n",
"\n",
"g = grammar(\"4 * (5 + 6) + 1\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"grammar??"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"grammar = parsley.makeGrammar(\"\"\"\n",
"integer = <digit+>:val -> int(val)\n",
"float = \".\" <digit+>:val2 -> float('.' + val2)\n",
" | <digit+>:val1 \".\" <digit*>:val2 -> float(val1 + '.' + val2)\n",
" | <digit+>:val1 \".\" -> float(val1)\n",
"number = (float | integer)\n",
"\n",
"identifier = <(letter | '_')>:first <(letter | digit | '_')*>:rest -> first + rest\n",
"ws = \" \"*\n",
"\n",
"parens = \"(\" ws value:v ws \")\" -> v\n",
"\n",
"func = identifier:funcname ws \"(\" ws value:arg ws \")\" -> (funcname, arg)\n",
"\n",
"value = func | parens | identifier | number\n",
"\"\"\", {})\n",
"\n",
"grammar('foo(x)').value()"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3.5",
"language": "",
"name": "python3.5"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
@jkmacc-LANL
Copy link

This is the Parsimonious documentation, at this point. Thanks for posting it:-)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment