Created
November 12, 2019 20:08
-
-
Save kognate/2b4e9812177600f0085aeb409f9efdf8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from lark import Lark" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Tree(start, [Token(WORD, 'Hello'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n" | |
] | |
} | |
], | |
"source": [ | |
"\n", | |
"l = Lark('''start: (WORD | NUMBER)+\",\" STRING NUMBER\"!\"\n", | |
" %import common.WORD // imports from terminal library\n", | |
" %import common.ESCAPED_STRING -> STRING\n", | |
" %import common.SIGNED_NUMBER -> NUMBER\n", | |
" %import common.WS\n", | |
" %ignore WS\n", | |
" ''')\n", | |
"\n", | |
"print( l.parse(\"Hello, \\\"World dude\\\" -40!\") )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Tree(start, [Token(NUMBER, '50'), Token(WORD, 'or'), Token(WORD, 'so'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n" | |
] | |
} | |
], | |
"source": [ | |
"print( l.parse(\"50 or so, \\\"World dude\\\" -40!\") )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Tree(start, [Token(WORD, 'Hello'), Token(NUMBER, '20'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n" | |
] | |
} | |
], | |
"source": [ | |
"print( l.parse(\"Hello 20, \\\"World dude\\\" -40!\") )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"l = Lark('''start: wonum\",\" estring only_numbers\"!\"\n", | |
" only_numbers: NUMBER\n", | |
" wonum: (WORD | NUMBER)+\n", | |
" estring: STRING\n", | |
" %import common.WORD // imports from terminal library\n", | |
" %import common.ESCAPED_STRING -> STRING\n", | |
" %import common.SIGNED_NUMBER -> NUMBER\n", | |
" %import common.WS\n", | |
" %ignore WS\n", | |
" ''')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from lark import Transformer\n", | |
"\n", | |
"class MyTransformer(Transformer):\n", | |
" def start(self, lvalues):\n", | |
" return \"|\".join(lvalues)\n", | |
" def estring(self, quoted_string):\n", | |
" return \" \".join([x[1:-1] for x in quoted_string])\n", | |
" def wonum(self, items):\n", | |
" return \" \".join(items)\n", | |
" def only_numbers(self, item):\n", | |
" return str(item[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"input_string = \"\"\"Hello 30 there, \"some stuff\" 20!\"\"\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'Hello 30 there|some stuff|20'" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"MyTransformer().transform(l.parse(input_string))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# A More Complicated Example\n", | |
"\n", | |
"We get a string that looks like this:\n", | |
"\n", | |
"```TOKEN : NUMBER : quoted or unquoted string : SIGNED INT```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"l = Lark('''start: token \":\" NUMBER \":\" qornot \":\" NUMBER\n", | |
" token: tire | wheel | caster | disc | mechano |tirer\n", | |
" tire: \"TIRE\"\n", | |
" wheel: \"WHEEL\"\n", | |
" caster: \"CASTOR\"\n", | |
" disc: \"DISC\"\n", | |
" tirer: \"TIRER\"\n", | |
" mechano: \"MECHANO\"\n", | |
" qornot: WORD+ | STRING\n", | |
" %import common.WORD // imports from terminal library\n", | |
" %import common.ESCAPED_STRING -> STRING\n", | |
" %import common.SIGNED_NUMBER -> NUMBER\n", | |
" %import common.WS\n", | |
" %ignore WS\n", | |
" ''')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Tree(start, [Tree(token, [Tree(tire, [])]), Token(NUMBER, '30'), Tree(qornot, [Token(STRING, '\"hello there\"')]), Token(NUMBER, '10')])" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"l.parse(\"\"\"TIRE : 30 : \"hello there\" : 10\"\"\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from pydantic import BaseModel\n", | |
"\n", | |
"class Order(BaseModel):\n", | |
" kind: str\n", | |
" description: str\n", | |
" have: int\n", | |
" need: int\n", | |
"\n", | |
"\n", | |
"class OrderTransformer(Transformer):\n", | |
" def start(self, items):\n", | |
" return Order(kind=items[0],\n", | |
" description=items[2],\n", | |
" have=items[1],\n", | |
" need=items[3])\n", | |
" def token(self, items):\n", | |
" return str(items[0].data)\n", | |
" def qornot(self, items):\n", | |
" return \" \".join(items)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Order(kind='tirer', description='\"this won\\'t work\"', have=30, need=-10)" | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"OrderTransformer().transform(l.parse(\"\"\"TIRER : 30 : \"this won't work\": -10\"\"\"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.5rc1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jupyterlab | |
lark-parser | |
pydantic |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment