Last active
December 8, 2016 04:58
-
-
Save jakevdp/4afdc655e5b80bcc4e09618be5498681 to your computer and use it in GitHub Desktop.
Testing Parsley vs Parsimonious
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Exploring Parsing\n", | |
| "\n", | |
| "Parsley vs Parsimonious" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Simple: Matching an Integer" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "INTEGER_TEST_CASES = {'12345': 12345,\n", | |
| " '+100': 100,\n", | |
| " '-4': -4}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Parsimonious" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "12345" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from parsimonious import NodeVisitor\n", | |
| "from parsimonious.grammar import Grammar\n", | |
| "\n", | |
| "parsimonious_int_grammar = \"\"\"\n", | |
| "integer = ~\"[+-]?[0-9]+\"\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "class IntEvaluator(NodeVisitor):\n", | |
| " def __init__(self, grammar, ctx, strict=True):\n", | |
| " self.grammar = Grammar(grammar)\n", | |
| " self._ctx = ctx\n", | |
| " self._strict = strict\n", | |
| " \n", | |
| " def visit_integer(self, node, children):\n", | |
| " return int(node.text)\n", | |
| " \n", | |
| "\n", | |
| "grammar = IntEvaluator(parsimonious_int_grammar, {})\n", | |
| "grammar.parse('12345')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "checking '12345' ... ok\n", | |
| "checking '-4' ... ok\n", | |
| "checking '+100' ... ok\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "def test_integer_parsimonious():\n", | |
| " grammar = IntEvaluator(parsimonious_int_grammar, {})\n", | |
| " for key, val in INTEGER_TEST_CASES.items():\n", | |
| " print(\"checking {0!r:<15}\".format(key), end='')\n", | |
| " assert grammar.parse(key) == val\n", | |
| " print(\" ... ok\")\n", | |
| " \n", | |
| "test_integer_parsimonious()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Parsley" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "12345" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import parsley\n", | |
| "\n", | |
| "parsley_int_grammar = \"\"\"\n", | |
| "integer = <(\"+\" | \"-\")? digit+>:d -> int(d)\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "grammar = parsley.makeGrammar(parsley_int_grammar, {})\n", | |
| "grammar('12345').integer()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "checking '12345' ... ok\n", | |
| "checking '-4' ... ok\n", | |
| "checking '+100' ... ok\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "def test_integer_parsley():\n", | |
| " grammar = parsley.makeGrammar(parsley_int_grammar, {})\n", | |
| " for key, val in INTEGER_TEST_CASES.items():\n", | |
| " print(\"checking {0!r:<15}\".format(key), end='')\n", | |
| " assert grammar(key).integer() == val\n", | |
| " print(\" ... ok\")\n", | |
| " \n", | |
| "test_integer_parsley()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Simple arithmetic parser" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "ARITHMETIC_TEST_CASES = {\n", | |
| " '43': 43,\n", | |
| " '-3': -3,\n", | |
| " '+2': +2,\n", | |
| " '1+2': ('add', 1, 2),\n", | |
| " '1 + 2': ('add', 1, 2),\n", | |
| " '1/-2': ('div', 1, -2),\n", | |
| " '44 *5': ('mul', 44, 5),\n", | |
| " '-564* 4': ('mul', -564, 4)\n", | |
| "}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Parsimonious" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "('mul', 4, ('div', ('add', ('mul', ('sub', 32, 11), -44), 55), 12))" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "parsimonious_arith_grammar = r\"\"\"\n", | |
| "add = (sub sp \"+\" sp add) / sub\n", | |
| "\n", | |
| "sub = (mul sp \"-\" sp sub) / mul\n", | |
| "\n", | |
| "mul = (div sp \"*\" sp mul) / div\n", | |
| "\n", | |
| "div = (primary sp \"/\" sp div) / primary\n", | |
| "\n", | |
| "primary = (\"(\" sp add sp \")\") / integer\n", | |
| "\n", | |
| "integer = ~\"[+-]?[0-9]+\"\n", | |
| "\n", | |
| "sp = \" \"*\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "\n", | |
| "class SimpleArithEvaluator(NodeVisitor):\n", | |
| " binary_expressions = ['add', 'sub', 'mul', 'div']\n", | |
| " \n", | |
| " def __init__(self, grammar, ctx, strict=True):\n", | |
| " self.grammar = Grammar(grammar)\n", | |
| " self._ctx = ctx\n", | |
| " self._strict = strict\n", | |
| " \n", | |
| " def _visit_binary(self, node, children):\n", | |
| " child = children[0]\n", | |
| " if not hasattr(child, '__len__'):\n", | |
| " return child\n", | |
| " if len(child) == 3:\n", | |
| " return child\n", | |
| " elif len(child) == 5:\n", | |
| " return (node.expr_name, child[0], child[4])\n", | |
| " \n", | |
| " visit_add = _visit_binary\n", | |
| " visit_sub = _visit_binary\n", | |
| " visit_mul = _visit_binary\n", | |
| " visit_div = _visit_binary\n", | |
| " \n", | |
| " def visit_primary(self, node, children):\n", | |
| " child = children[0]\n", | |
| " if not hasattr(child, '__len__'):\n", | |
| " return child\n", | |
| " elif len(child) == 5:\n", | |
| " return child[2]\n", | |
| " \n", | |
| " def visit_integer(self, node, children):\n", | |
| " return int(node.text)\n", | |
| " \n", | |
| " def generic_visit(self, node, children):\n", | |
| " return children\n", | |
| " \n", | |
| "grammar = SimpleArithEvaluator(parsimonious_arith_grammar, {})\n", | |
| "grammar.parse('4 * ((32 - 11) * -44 + 55) / 12')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1000 loops, best of 3: 1.23 ms per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%timeit grammar.parse('(4 * ((32 - 11) * -44 + 55) / 12) * (60 - 32 + 581)')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "checking '-3' ... ok\n", | |
| "checking '+2' ... ok\n", | |
| "checking '43' ... ok\n", | |
| "checking '1 + 2' ... ok\n", | |
| "checking '1/-2' ... ok\n", | |
| "checking '44 *5' ... ok\n", | |
| "checking '1+2' ... ok\n", | |
| "checking '-564* 4' ... ok\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "def test_arithmetic_parsimonious():\n", | |
| " grammar = SimpleArithEvaluator(parsimonious_arith_grammar, {})\n", | |
| " for key, val in ARITHMETIC_TEST_CASES.items():\n", | |
| " print(\"checking {0!r:<15}\".format(key), end='')\n", | |
| " assert grammar.parse(key) == val\n", | |
| " print(\" ... ok\")\n", | |
| " \n", | |
| "test_arithmetic_parsimonious()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Parsley" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "('mul', 4, ('div', ('add', ('mul', ('sub', 32, 11), -44), 55), 12))" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "parsley_arith_grammar = \"\"\"\n", | |
| "parse = add\n", | |
| "\n", | |
| "add = sub:left sp \"+\" sp add:right -> ('add', left, right)\n", | |
| " | sub:child -> child\n", | |
| "\n", | |
| "sub = mul:left sp \"-\" sp sub:right -> ('sub', left, right)\n", | |
| " | mul:child -> child\n", | |
| "\n", | |
| "mul = div:left sp \"*\" sp mul:right -> ('mul', left, right)\n", | |
| " | div:child -> child\n", | |
| "\n", | |
| "div = primary:left sp \"/\" sp div:right -> ('div', left, right)\n", | |
| " | primary:child -> child\n", | |
| " \n", | |
| "primary = \"(\" sp add:child sp \")\" -> child\n", | |
| " | integer:child -> child\n", | |
| "\n", | |
| "sp = ' '*\n", | |
| "integer = <(\"+\" | \"-\")?digit+>:d -> int(d)\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "grammar = parsley.makeGrammar(parsley_arith_grammar, {})\n", | |
| "grammar('4 * ((32 - 11) * -44 + 55) / 12').parse()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "100 loops, best of 3: 3.93 ms per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%timeit grammar('(4 * ((32 - 11) * -44 + 55) / 12) * (60 - 32 + 581)').parse()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "checking '-3' ... ok\n", | |
| "checking '+2' ... ok\n", | |
| "checking '43' ... ok\n", | |
| "checking '1 + 2' ... ok\n", | |
| "checking '1/-2' ... ok\n", | |
| "checking '44 *5' ... ok\n", | |
| "checking '1+2' ... ok\n", | |
| "checking '-564* 4' ... ok\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "def test_arithmetic_parsley():\n", | |
| " grammar = parsley.makeGrammar(parsley_arith_grammar, {})\n", | |
| " for key, val in ARITHMETIC_TEST_CASES.items():\n", | |
| " print(\"checking {0!r:<15}\".format(key), end='')\n", | |
| " assert grammar(key).parse() == val\n", | |
| " print(\" ... ok\")\n", | |
| " \n", | |
| "test_arithmetic_parsley()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "source": [ | |
| "## Matching numbers\n", | |
| "\n", | |
| "Let's build a parser that matches numbers... int, binary, octal, hex, and floats" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "NUMBER_TEST_CASES = {\n", | |
| " # decimal integers\n", | |
| " '12345': 12345,\n", | |
| " '+100': 100,\n", | |
| " '-42': -42,\n", | |
| " \n", | |
| " # binary integers\n", | |
| " '0b11': 0b11,\n", | |
| " '-0b101': -0b101,\n", | |
| " '0B01': 0b01,\n", | |
| " '-0B001': -0b001,\n", | |
| " \n", | |
| " # octal integers\n", | |
| " '012': 0o12,\n", | |
| " '-0132': -0o132,\n", | |
| " '0o12': 0o12,\n", | |
| " '-0o132': -0o132,\n", | |
| " '0O12': 0o12,\n", | |
| " '-0O132': -0o132,\n", | |
| " \n", | |
| " # hexadecimal integers\n", | |
| " '0x42': 0x42,\n", | |
| " '-0xFA': -0xFA,\n", | |
| " '0X0F0': 0x0F0,\n", | |
| " '-0X5DEF': -0x5DEF,\n", | |
| " '0x42': 0x42,\n", | |
| " '-0xFA': -0xfA,\n", | |
| " '0X0F0': 0x0f0,\n", | |
| " '-0X5DEF': -0x5DeF,\n", | |
| " \n", | |
| " # floating point\n", | |
| " '11.': 11.0,\n", | |
| " '0E5': 0.0,\n", | |
| " '1.1': 1.1,\n", | |
| " '0.1': 0.1,\n", | |
| " '+.1': 0.1,\n", | |
| " '.11': 0.11,\n", | |
| " '1E4': 1E4,\n", | |
| " '1.2E-3': 1.2E-3,\n", | |
| " '-1E-4': -1E-4,\n", | |
| " '-1.2E-3': -1.2E-3,\n", | |
| " '12e4': 12E4,\n", | |
| " '1.22e-3': 1.22E-3,\n", | |
| " '-12e-4': -12E-4,\n", | |
| " '-1.22e-3': -1.22E-3,\n", | |
| "}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Parsimonious" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "-11.0" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "parsimonious_number_grammar = r\"\"\"\n", | |
| "number = hexadecimal / binary / octal / float / integer\n", | |
| "\n", | |
| "float = exponent_float / decimal_float\n", | |
| "\n", | |
| "exponent_float = (decimal_float / integer_part) (\"E\" / \"e\") integer_part\n", | |
| "\n", | |
| "decimal_float = ~r\"[+-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+)\"\n", | |
| "\n", | |
| "# integer part of floats can start with zero\n", | |
| "integer_part = ~r\"[+-]?[0-9]+\"\n", | |
| "\n", | |
| "hexadecimal = ~r\"[+-]?0[Xx][0-9A-Fa-f]+\"\n", | |
| "\n", | |
| "binary = ~r\"[+-]?0[Bb][0-1]+\"\n", | |
| "\n", | |
| "octal = ~r\"[+-]?0[Oo]?[0-7]+\"\n", | |
| "\n", | |
| "# integers cannot start with 0 as that indicates octal\n", | |
| "integer = ~r\"[+-]?[1-9][0-9]*\"\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "class NumberEvaluator(NodeVisitor):\n", | |
| " def __init__(self, grammar, ctx, strict=True):\n", | |
| " self.grammar = Grammar(grammar)\n", | |
| " self._ctx = ctx\n", | |
| " self._strict = strict\n", | |
| " \n", | |
| " def visit_number(self, node, children):\n", | |
| " return children[0]\n", | |
| " \n", | |
| " def visit_float(self, node, children):\n", | |
| " return float(node.text)\n", | |
| " \n", | |
| " def visit_hexadecimal(self, node, children):\n", | |
| " return int(node.text, 16)\n", | |
| " \n", | |
| " def visit_binary(self, node, children):\n", | |
| " return int(node.text, 2)\n", | |
| " \n", | |
| " def visit_octal(self, node, children):\n", | |
| " return int(node.text, 8)\n", | |
| " \n", | |
| " def visit_integer(self, node, children):\n", | |
| " return int(node.text)\n", | |
| " \n", | |
| " def generic_visit(self, node, children):\n", | |
| " return children\n", | |
| " \n", | |
| "\n", | |
| "grammar = NumberEvaluator(parsimonious_number_grammar, {})\n", | |
| "grammar.parse('-11.0')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "checking '1.1' ... ok\n", | |
| "checking '-0B001' ... ok\n", | |
| "checking '0.1' ... ok\n", | |
| "checking '1.2E-3' ... ok\n", | |
| "checking '-0o132' ... ok\n", | |
| "checking '0x42' ... ok\n", | |
| "checking '-0xFA' ... ok\n", | |
| "checking '0X0F0' ... ok\n", | |
| "checking '1E4' ... ok\n", | |
| "checking '-1.22e-3' ... ok\n", | |
| "checking '-0b101' ... ok\n", | |
| "checking '-12e-4' ... ok\n", | |
| "checking '-0132' ... ok\n", | |
| "checking '0O12' ... ok\n", | |
| "checking '-1.2E-3' ... ok\n", | |
| "checking '1.22e-3' ... ok\n", | |
| "checking '.11' ... ok\n", | |
| "checking '0B01' ... ok\n", | |
| "checking '-0O132' ... ok\n", | |
| "checking '12e4' ... ok\n", | |
| "checking '12345' ... ok\n", | |
| "checking '0b11' ... ok\n", | |
| "checking '-0X5DEF' ... ok\n", | |
| "checking '+.1' ... ok\n", | |
| "checking '11.' ... ok\n", | |
| "checking '0E5' ... ok\n", | |
| "checking '-1E-4' ... ok\n", | |
| "checking '+100' ... ok\n", | |
| "checking '012' ... ok\n", | |
| "checking '0o12' ... ok\n", | |
| "checking '-42' ... ok\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "def test_number_parsimonious():\n", | |
| " grammar = NumberEvaluator(parsimonious_number_grammar, {})\n", | |
| " for key, val in NUMBER_TEST_CASES.items():\n", | |
| " print(\"checking {0!r:<15}\".format(key), end='')\n", | |
| " assert grammar.parse(key) == val\n", | |
| " print(\" ... ok\")\n", | |
| " \n", | |
| "test_number_parsimonious()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "source": [ | |
| "### Parsley" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "-40000000.0" | |
| ] | |
| }, | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "parsley_number_grammar = \"\"\"\n", | |
| "number = hexadecimal | octal | binary | float | integer\n", | |
| "\n", | |
| "binary = <sign \"0\" (\"B\" | \"b\") digit0_1+>:d -> int(d, 2)\n", | |
| "\n", | |
| "octal = <sign \"0\" (\"O\" | \"o\")? digit0_7+>:d -> int(d, 8)\n", | |
| "\n", | |
| "hexadecimal = <sign \"0\" (\"X\" | \"x\") digit_hex+>:d -> int(d, 16)\n", | |
| "\n", | |
| "float = exponent_float | decimal_float\n", | |
| "\n", | |
| "exponent_float = <sign (decimal_float | digit+) (\"E\" | \"e\") sign digit+>:d -> float(d)\n", | |
| "\n", | |
| "decimal_float = <sign digit+ \".\" digit*>:d -> float(d)\n", | |
| " | <sign \".\" digit+>:d -> float(d)\n", | |
| "\n", | |
| "# integers cannot start with 0 as that indicates octal\n", | |
| "integer = <sign digit1_9 digit*>:d -> int(d)\n", | |
| "\n", | |
| "# digit components\n", | |
| "sign = <(\"+\" | \"-\")?>:s -> s\n", | |
| "digit = :x ?(x in '0123456789') -> x\n", | |
| "digit1_9 = :x ?(x in '123456789') -> x\n", | |
| "digit0_7 = :x ?(x in '01234567') -> x\n", | |
| "digit0_1 = :x ?(x in '01') -> x\n", | |
| "digit_hex = :x ?(x in '0123456789abcdefABCDEF') -> x\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "grammar = parsley.makeGrammar(parsley_number_grammar, {})\n", | |
| "grammar('-400.0E5').number()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "checking '1.1' ... ok\n", | |
| "checking '-0B001' ... ok\n", | |
| "checking '0.1' ... ok\n", | |
| "checking '1.2E-3' ... ok\n", | |
| "checking '-0o132' ... ok\n", | |
| "checking '0x42' ... ok\n", | |
| "checking '-0xFA' ... ok\n", | |
| "checking '0X0F0' ... ok\n", | |
| "checking '1E4' ... ok\n", | |
| "checking '-1.22e-3' ... ok\n", | |
| "checking '-0b101' ... ok\n", | |
| "checking '-12e-4' ... ok\n", | |
| "checking '-0132' ... ok\n", | |
| "checking '0O12' ... ok\n", | |
| "checking '-1.2E-3' ... ok\n", | |
| "checking '1.22e-3' ... ok\n", | |
| "checking '.11' ... ok\n", | |
| "checking '0B01' ... ok\n", | |
| "checking '-0O132' ... ok\n", | |
| "checking '12e4' ... ok\n", | |
| "checking '12345' ... ok\n", | |
| "checking '0b11' ... ok\n", | |
| "checking '-0X5DEF' ... ok\n", | |
| "checking '+.1' ... ok\n", | |
| "checking '11.' ... ok\n", | |
| "checking '0E5' ... ok\n", | |
| "checking '-1E-4' ... ok\n", | |
| "checking '+100' ... ok\n", | |
| "checking '012' ... ok\n", | |
| "checking '0o12' ... ok\n", | |
| "checking '-42' ... ok\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "def test_number_parsley():\n", | |
| " grammar = parsley.makeGrammar(parsley_number_grammar, {})\n", | |
| " for key, val in NUMBER_TEST_CASES.items():\n", | |
| " print(\"checking {0!r:<15}\".format(key), end='')\n", | |
| " assert grammar(key).number() == val\n", | |
| " print(\" ... ok\")\n", | |
| " \n", | |
| "test_number_parsley()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Scratch space" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "parsimonious_literal_grammar = r\"\"\"\n", | |
| "literal = string / float / integer\n", | |
| "\n", | |
| "string = ~r\"([\\'\\\"])([^\\1]*)\\1\"\n", | |
| "\n", | |
| "float = exponent_float / decimal_float\n", | |
| "exponent_float = (decimal_float / (sign digits)) ~\"[Ee]\" sign digits\n", | |
| "decimal_float = (sign digits \".\" digits) / (sign digits \".\") / (\".\" digits)\n", | |
| "\n", | |
| "integer = sign digits\n", | |
| "\n", | |
| "sign = ~\"[+-]?\"\n", | |
| "digits = ~\"[0-9]+\"\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "class LiteralEvaluator(NodeVisitor):\n", | |
| " def __init__(self, grammar, ctx, strict=True):\n", | |
| " self.grammar = Grammar(grammar)\n", | |
| " self._ctx = ctx\n", | |
| " self._strict = strict\n", | |
| " \n", | |
| " def visit_string(self, node, children):\n", | |
| " return node.match.groups()[1]\n", | |
| " \n", | |
| " def visit_float(self, node, children):\n", | |
| " return float(node.full_text)\n", | |
| " \n", | |
| " def visit_integer(self, node, children):\n", | |
| " return int(node.full_text)\n", | |
| " \n", | |
| " def visit_literal(self, node, children):\n", | |
| " return children[0]\n", | |
| " \n", | |
| " def generic_visit(self, node, children):\n", | |
| " return children\n", | |
| "\n", | |
| "grammar = LiteralEvaluator(parsimonious_literal_grammar, {})\n", | |
| "grammar.parse('1.4E6')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "LITERAL_TEST_CASES = {'100': 100,\n", | |
| " '2.5': 2.5,\n", | |
| " '.5': 0.5,\n", | |
| " '5.': 5.0,\n", | |
| " '1E6': 1E6,\n", | |
| " '1e6': 1e6,\n", | |
| " '1.4E7': 1.4E7,\n", | |
| " '\"hello there agent #7\"': 'hello there agent #7',\n", | |
| " \"'hello 45.9/!'\": 'hello 45.9/!'}\n", | |
| "\n", | |
| "def test_literal_evaluator():\n", | |
| " parser = LiteralEvaluator(parsimonious_literal_grammar, {})\n", | |
| "\n", | |
| " for key, val in LITERAL_TEST_CASES.items():\n", | |
| " result = parser.parse(key)\n", | |
| " assert type(result) == type(val)\n", | |
| " assert result == val\n", | |
| " \n", | |
| "test_literal_evaluator()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "grammar = Grammar(r\"\"\"\n", | |
| "expr\n", | |
| "expr = binop / parens / func / identifier / sequence / literal\n", | |
| "\n", | |
| "parens = \"(\" space expr space \")\"\n", | |
| "\n", | |
| "\n", | |
| "# Arithmetic Operators\n", | |
| "\n", | |
| "binop = muldiv / addsub\n", | |
| "\n", | |
| "addsub = add / sub\n", | |
| "\n", | |
| "muldiv = mul / div\n", | |
| "\n", | |
| "add = expr space \"+\" space expr\n", | |
| "\n", | |
| "sub = expr space \"-\" space expr\n", | |
| "\n", | |
| "mul = expr space \"*\" space expr\n", | |
| "\n", | |
| "div = expr space \"/\" space expr\n", | |
| "\n", | |
| "\n", | |
| "# Functions and Identifiers\n", | |
| "\n", | |
| "func = identifier space \"(\" seq \")\"\n", | |
| "\n", | |
| "identifier = ~\"[_a-zA-Z][_a-zA-Z0-9]*\"\n", | |
| "\n", | |
| "\n", | |
| "# Lists and mappings\n", | |
| "\n", | |
| "sequence = list / tuple / set / mapping\n", | |
| "\n", | |
| "list = \"[\" seq \"]\"\n", | |
| "\n", | |
| "tuple = \"(\" seq \")\"\n", | |
| "\n", | |
| "set = \"{\" seq \"}\"\n", | |
| "\n", | |
| "mapping = \"{\" mappingseq \"}\"\n", | |
| "\n", | |
| "seq = space (seqitem space \",\" space)* (seqitem space)?\n", | |
| "\n", | |
| "seqitem = expr\n", | |
| "\n", | |
| "mappingseq = space (mappingitem space \",\" space)* (mappingitem space)?\n", | |
| "\n", | |
| "mappingitem = identifier space \":\" space expr\n", | |
| "\n", | |
| "\n", | |
| "# Literal objects\n", | |
| "\n", | |
| "literal = string / float / integer\n", | |
| "\n", | |
| "string = ~\"\\\"[^\\\"]*\\\"|\\'[^\\']*\\'\"\n", | |
| "\n", | |
| "number = float / integer\n", | |
| "\n", | |
| "float = (integer \".\" digits) / (integer \".\") / (\".\" digits) / (integer ~\"[Ee]\" integer)\n", | |
| "\n", | |
| "integer = \"-\"? digits\n", | |
| "\n", | |
| "\n", | |
| "# Building blocks\n", | |
| "\n", | |
| "space = \" \"*\n", | |
| "digits = ~\"[0-9]+\"\n", | |
| "\"\"\")\n", | |
| "\n", | |
| "#g = grammar.parse('[foo( -2e-6, blah ), 5.0]')\n", | |
| "g = grammar.parse('{hello: blah, foo: \"bar\"} + 4')\n", | |
| "print(g.full_text)\n", | |
| "print(g)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# precedence\n", | |
| "\n", | |
| "# comma/sequence\n", | |
| "# inline conditional ? :\n", | |
| "# logical or\n", | |
| "# logical and\n", | |
| "# equality/inequality/strict equality/strict inequality\n", | |
| "# greater/less than (or equal to)\n", | |
| "# bit-shifts << >> >>>\n", | |
| "# addition/subtraction\n", | |
| "# multiplication/division/mod/exponentiation\n", | |
| "# binary/logical not; unary +/-\n", | |
| "# function call\n", | |
| "# member access via . or []\n", | |
| "# parens" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "'11.' -> float\n", | |
| "'1.1' -> float\n", | |
| "'.11' -> float\n", | |
| "'11' -> decimal\n", | |
| "'0b11' -> binary\n", | |
| "'011' -> octal\n", | |
| "'0x11' -> hexadecimal\n", | |
| "\n", | |
| "str has \"\" or ''" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import parsley\n", | |
| "numbers = parsley.makeGrammar(\"\"\"\n", | |
| "number = float | integer\n", | |
| "integer = <digit+>:d -> int(d)\n", | |
| "float = \".\" <digit+>:B -> float('.' + B)\n", | |
| " | <digit+>:A \".\" <digit*>:B -> float(A + '.' + B)\n", | |
| " | <digit+>:val1 \".\" -> float(A + '.')\n", | |
| "\"\"\", {})\n", | |
| "\n", | |
| "numbers('4.').number()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import parsley\n", | |
| "\n", | |
| "def calculate(start, pairs):\n", | |
| " result = start\n", | |
| " for op, value in pairs:\n", | |
| " if op == '+':\n", | |
| " result += value\n", | |
| " elif op == '-':\n", | |
| " result -= value\n", | |
| " elif op == '*':\n", | |
| " result *= value\n", | |
| " elif op == '/':\n", | |
| " result /= value\n", | |
| " return result\n", | |
| "\n", | |
| "grammar = parsley.makeGrammar(\"\"\"\n", | |
| "number = <digit+>:ds -> int(ds)\n", | |
| "parens = '(' ws expr:e ws ')' -> e\n", | |
| "value = number | parens\n", | |
| "ws = ' '*\n", | |
| "add = '+' ws expr2:n -> ('+', n)\n", | |
| "sub = '-' ws expr2:n -> ('-', n)\n", | |
| "mul = '*' ws value:n -> ('*', n)\n", | |
| "div = '/' ws value:n -> ('/', n)\n", | |
| "\n", | |
| "addsub = ws (add | sub)\n", | |
| "muldiv = ws (mul | div)\n", | |
| "\n", | |
| "expr = expr2:left addsub*:right -> calculate(left, right)\n", | |
| "expr2 = value:left muldiv*:right -> calculate(left, right)\n", | |
| "\"\"\", {\"calculate\": calculate})\n", | |
| "\n", | |
| "g = grammar(\"4 * (5 + 6) + 1\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "grammar??" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "grammar = parsley.makeGrammar(\"\"\"\n", | |
| "integer = <digit+>:val -> int(val)\n", | |
| "float = \".\" <digit+>:val2 -> float('.' + val2)\n", | |
| " | <digit+>:val1 \".\" <digit*>:val2 -> float(val1 + '.' + val2)\n", | |
| " | <digit+>:val1 \".\" -> float(val1)\n", | |
| "number = (float | integer)\n", | |
| "\n", | |
| "identifier = <(letter | '_')>:first <(letter | digit | '_')*>:rest -> first + rest\n", | |
| "ws = \" \"*\n", | |
| "\n", | |
| "parens = \"(\" ws value:v ws \")\" -> v\n", | |
| "\n", | |
| "func = identifier:funcname ws \"(\" ws value:arg ws \")\" -> (funcname, arg)\n", | |
| "\n", | |
| "value = func | parens | identifier | number\n", | |
| "\"\"\", {})\n", | |
| "\n", | |
| "grammar('foo(x)').value()" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "anaconda-cloud": {}, | |
| "kernelspec": { | |
| "display_name": "Python 3.5", | |
| "language": "", | |
| "name": "python3.5" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is the Parsimonious documentation, at this point. Thanks for posting it:-)