Created
October 1, 2022 03:59
-
-
Save shanecandoit/094031ebe7543f291c2a3c2fd5340a54 to your computer and use it in GitHub Desktop.
read some python code, turn it into an ast, into json, then ...
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import inspect\n", | |
"import ast" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def fib(n):\n", | |
" if n < 2:\n", | |
" return 1\n", | |
" else:\n", | |
" return fib(n-1) + fib(n-2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"def fib(n):\n", | |
" if n < 2:\n", | |
" return 1\n", | |
" else:\n", | |
" return fib(n-1) + fib(n-2)\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"fib_src = inspect.getsource(fib)\n", | |
"print(fib_src)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Module(\n", | |
" body=[\n", | |
" FunctionDef(\n", | |
" name='fib',\n", | |
" args=arguments(\n", | |
" posonlyargs=[],\n", | |
" args=[\n", | |
" arg(arg='n')],\n", | |
" kwonlyargs=[],\n", | |
" kw_defaults=[],\n", | |
" defaults=[]),\n", | |
" body=[\n", | |
" If(\n", | |
" test=Compare(\n", | |
" left=Name(id='n', ctx=Load()),\n", | |
" ops=[\n", | |
" Lt()],\n", | |
" comparators=[\n", | |
" Constant(value=2)]),\n", | |
" body=[\n", | |
" Return(\n", | |
" value=Constant(value=1))],\n", | |
" orelse=[\n", | |
" Return(\n", | |
" value=BinOp(\n", | |
" left=Call(\n", | |
" func=Name(id='fib', ctx=Load()),\n", | |
" args=[\n", | |
" BinOp(\n", | |
" left=Name(id='n', ctx=Load()),\n", | |
" op=Sub(),\n", | |
" right=Constant(value=1))],\n", | |
" keywords=[]),\n", | |
" op=Add(),\n", | |
" right=Call(\n", | |
" func=Name(id='fib', ctx=Load()),\n", | |
" args=[\n", | |
" BinOp(\n", | |
" left=Name(id='n', ctx=Load()),\n", | |
" op=Sub(),\n", | |
" right=Constant(value=2))],\n", | |
" keywords=[])))])],\n", | |
" decorator_list=[])],\n", | |
" type_ignores=[])\n" | |
] | |
} | |
], | |
"source": [ | |
"fib_ast = ast.parse(fib_src)\n", | |
"ast_str = ast.dump(fib_ast, indent=2)\n", | |
"print(ast_str)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 73, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"? right=Constant(value=2))],\n", | |
"+ right=Constant{value:2})],\n", | |
"- nothing to do here\n" | |
] | |
} | |
], | |
"source": [ | |
"def extract(text):\n", | |
" first_close = text.find(')')\n", | |
" last_open = text[:first_close].find('(')\n", | |
" # print(first_close, last_open)\n", | |
" # 22 14\n", | |
" if first_close==-1 or last_open==-1:\n", | |
" return text\n", | |
" inner = text[last_open:first_close+1]\n", | |
" # (value=2) >> value=2\n", | |
" new_inner = ('{'+inner[1:-1]+'}').replace('=', ':')\n", | |
" # print(text)\n", | |
" # right=Constant(value=2))],\n", | |
" # print(inner)\n", | |
" # value=2\n", | |
" \n", | |
" # right=Constant{value:2})],\n", | |
" return text.replace(inner, new_inner)\n", | |
"\n", | |
"left='right=Constant(value=2))],'\n", | |
"\n", | |
"# pos\n", | |
"ext = extract(left)\n", | |
"print('?',left)\n", | |
"print('+',ext)\n", | |
"\n", | |
"# neg\n", | |
"r=extract('nothing to do here')\n", | |
"print('-',r)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 74, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Module:{\n", | |
" body:{\n", | |
" FunctionDef:{\n", | |
" name:\"fib\",\n", | |
" args:arguments:{\n", | |
" posonlyargs:{},\n", | |
" args:{\n", | |
" arg:{arg:\"n\"}},\n", | |
" kwonlyargs:{},\n", | |
" kw_defaults:{},\n", | |
" defaults:{}},\n", | |
" body:{\n", | |
" If:{\n", | |
" test:Compare:{\n", | |
" left:Name:{id:\"n\", ctx:Load:{}},\n", | |
" ops:{\n", | |
" Lt:{}},\n", | |
" comparators:{\n", | |
" Constant:{value:2}}},\n", | |
" body:{\n", | |
" Return:{\n", | |
" value:Constant:{value:1}}},\n", | |
" orelse:{\n", | |
" Return:{\n", | |
" value:BinOp:{\n", | |
" left:Call:{\n", | |
" func:Name:{id:\"fib\", ctx:Load:{}},\n", | |
" args:{\n", | |
" BinOp:{\n", | |
" left:Name:{id:\"n\", ctx:Load:{}},\n", | |
" op:Sub:{},\n", | |
" right:Constant:{value:1}}},\n", | |
" keywords:{}},\n", | |
" op:Add:{},\n", | |
" right:Call:{\n", | |
" func:Name:{id:\"fib\", ctx:Load:{}},\n", | |
" args:{\n", | |
" BinOp:{\n", | |
" left:Name:{id:\"n\", ctx:Load:{}},\n", | |
" op:Sub:{},\n", | |
" right:Constant:{value:2}}},\n", | |
" keywords:{}}}}}}},\n", | |
" decorator_list:{}}},\n", | |
" type_ignores:{}}\n" | |
] | |
} | |
], | |
"source": [ | |
"# turn the AST into json\n", | |
"import json\n", | |
"almost_json = ast_str\n", | |
"\n", | |
"# 'body=[' >> 'body:['\n", | |
"almost_json = almost_json.replace('=', ':')\n", | |
"\n", | |
"# 'body=[' >> 'body:{'\n", | |
"almost_json = almost_json.replace('[', '{').replace(']', '}')\n", | |
"\n", | |
"# 'FunctionDef(' >> 'FunctionDef:{'\n", | |
"almost_json = almost_json.replace('(', ':{').replace(')', '}')\n", | |
"\n", | |
"almost_json = almost_json.replace(\"'\", '\"')\n", | |
"\n", | |
"extracted = extract(almost_json)\n", | |
"while extracted != almost_json:\n", | |
" almost_json = extracted\n", | |
" extracted = extract(almost_json)\n", | |
"\n", | |
"print(almost_json)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "JSONDecodeError", | |
"evalue": "Expecting value: line 1 column 1 (char 0)", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)", | |
"Cell \u001b[0;32mIn [68], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m js_tree \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mloads(almost_json)\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(js_tree)\n", | |
"File \u001b[0;32m/usr/lib/python3.10/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 341\u001b[0m s \u001b[39m=\u001b[39m s\u001b[39m.\u001b[39mdecode(detect_encoding(s), \u001b[39m'\u001b[39m\u001b[39msurrogatepass\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 343\u001b[0m \u001b[39mif\u001b[39;00m (\u001b[39mcls\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m object_hook \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m\n\u001b[1;32m 344\u001b[0m parse_int \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m parse_float \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m\n\u001b[1;32m 345\u001b[0m parse_constant \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m object_pairs_hook \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m \u001b[39mreturn\u001b[39;00m _default_decoder\u001b[39m.\u001b[39;49mdecode(s)\n\u001b[1;32m 347\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mcls\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 348\u001b[0m \u001b[39mcls\u001b[39m \u001b[39m=\u001b[39m JSONDecoder\n", | |
"File \u001b[0;32m/usr/lib/python3.10/json/decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdecode\u001b[39m(\u001b[39mself\u001b[39m, s, _w\u001b[39m=\u001b[39mWHITESPACE\u001b[39m.\u001b[39mmatch):\n\u001b[1;32m 333\u001b[0m \u001b[39m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[39m containing a JSON document).\u001b[39;00m\n\u001b[1;32m 335\u001b[0m \n\u001b[1;32m 336\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 337\u001b[0m obj, end \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mraw_decode(s, idx\u001b[39m=\u001b[39;49m_w(s, \u001b[39m0\u001b[39;49m)\u001b[39m.\u001b[39;49mend())\n\u001b[1;32m 338\u001b[0m end \u001b[39m=\u001b[39m _w(s, end)\u001b[39m.\u001b[39mend()\n\u001b[1;32m 339\u001b[0m \u001b[39mif\u001b[39;00m end \u001b[39m!=\u001b[39m \u001b[39mlen\u001b[39m(s):\n", | |
"File \u001b[0;32m/usr/lib/python3.10/json/decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 353\u001b[0m obj, end \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mscan_once(s, idx)\n\u001b[1;32m 354\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m--> 355\u001b[0m \u001b[39mraise\u001b[39;00m JSONDecodeError(\u001b[39m\"\u001b[39m\u001b[39mExpecting value\u001b[39m\u001b[39m\"\u001b[39m, s, err\u001b[39m.\u001b[39mvalue) \u001b[39mfrom\u001b[39;00m \u001b[39mNone\u001b[39m\n\u001b[1;32m 356\u001b[0m \u001b[39mreturn\u001b[39;00m obj, end\n", | |
"\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)" | |
] | |
} | |
], | |
"source": [ | |
"js_tree = json.loads(almost_json)\n", | |
"print(js_tree)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 92, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"want right:{Constant:{value:2}})],\n", | |
" got right:Constant{value:2})],\n" | |
] | |
}, | |
{ | |
"ename": "AssertionError", | |
"evalue": "", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", | |
"Cell \u001b[0;32mIn [92], line 9\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwant\u001b[39m\u001b[38;5;124m'\u001b[39m, want)\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m got\u001b[39m\u001b[38;5;124m'\u001b[39m, got)\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m got \u001b[38;5;241m==\u001b[39m want\n", | |
"\u001b[0;31mAssertionError\u001b[0m: " | |
] | |
} | |
], | |
"source": [ | |
"left='right=Constant(value=2))],'\n", | |
"want='right:{Constant:{value:2}})],'\n", | |
"def fix(s):\n", | |
" r = extract(s.replace('=', ':'))\n", | |
" return r\n", | |
"got = fix(left)\n", | |
"print('want', want)\n", | |
"print(' got', got)\n", | |
"assert got == want" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 76, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import re" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 88, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"want left_Name:{id:\"n\", ctx_Load:{}},\n", | |
" got left_Name:{id:\"n\", ctx_Load:{}},\n" | |
] | |
} | |
], | |
"source": [ | |
"def fix_word_colon_word(s):\n", | |
" return re.sub(r'(\\w+):(\\w+)', r'\\1_\\2', s)\n", | |
"\n", | |
"samp = 'left:Name:{id:\"n\", ctx:Load:{}},'\n", | |
"want = 'left_Name:{id:\"n\", ctx_Load:{}},'\n", | |
"got = fix_word_colon_word(samp)\n", | |
"print('want', want)\n", | |
"print(' got', got)\n", | |
"assert got == want" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"def remove_duds(s):\n", | |
" \"\"\" if we have 'word:{},' remove it \"\"\"\n", | |
" reg = re.compile(r'(\\w+):{}(,)?')\n", | |
" return reg.sub('', s)\n", | |
"\n", | |
"samp = 'posonlyargs:{},'\n", | |
"print(remove_duds(samp))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Module:{\n", | |
" body:{\n", | |
" FunctionDef:{\n", | |
" name:\"fib\",\n", | |
" args:arguments:{\n", | |
" posonlyargs:{},\n", | |
" args:{\n", | |
" arg:{arg:\"n\"}},\n", | |
" kwonlyargs:{},\n", | |
" kw_defaults:{},\n", | |
" defaults:{}},\n", | |
" body:{\n", | |
" If:{\n", | |
" test:Compare:{\n", | |
" left:Name:{id:\"n\", ctx:Loa\n", | |
"---\n", | |
"Module:{\n", | |
" body:{\n", | |
" FunctionDef:{\n", | |
" name:\"fib\",\n", | |
" args:arguments:{\n", | |
" \n", | |
" args:{\n", | |
" arg:{arg:\"n\"}},\n", | |
" \n", | |
" \n", | |
" },\n", | |
" body:{\n", | |
" If:{\n", | |
" test:Compare:{\n", | |
" left:Name:{id:\"n\", ctx:},\n", | |
" ops:{\n", | |
" },\n", | |
" comparat\n" | |
] | |
} | |
], | |
"source": [ | |
"print(almost_json[:300])\n", | |
"print('---')\n", | |
"print(remove_duds(almost_json)[:300])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.10.4 64-bit", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.6" | |
}, | |
"orig_nbformat": 4, | |
"vscode": { | |
"interpreter": { | |
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
turn parens into objects
right:Constant{value:2})]
into
right:{Constant:{value:2}}