Last active
July 31, 2024 23:46
-
-
Save Alwinfy/24afefd0999b7ae9b81faddba483aa99 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from warnings import catch_warnings, filterwarnings # this is always nice to see at the top of a file | |
class Primitive: | |
__slots__ = ["args"] | |
def __init__(self, *args): self.args = args | |
def __await__(self): return (yield self.args) # also a fun sight to see | |
async def eof(): return await Primitive("eof") | |
async def munch(): return await Primitive("munch") | |
async def choice(*args): return await Primitive("choice", args) | |
async def fail(): return await choice() # checkmate | |
async def always(ret=None): return ret | |
async def fmap(parse, fn): # normal programmers hate them! one secret trick to make any codebase unreadable | |
return fn(await parse) | |
def maybe(run): | |
return choice( | |
fmap(run, lambda v: (True, v)), | |
always((False, None)), | |
) | |
async def ret2nd(l, value, *r): # fully self-indulgent | |
await l | |
out = await value | |
for c in r: await c | |
return out | |
async def guard(parse, pred): | |
val = await parse | |
if not pred(val): await fail() | |
return val | |
async def gather(thunk): # run it until it falls over | |
gathered = [] | |
while True: | |
ok, val = await maybe(thunk()) | |
if not ok: break | |
gathered.append(val) | |
return gathered | |
def gatherPlus(thunk): # ...but run it at least once, please | |
return guard(gather(thunk), bool) | |
async def gatherBetween(thunk, inter): # The Comma Parser | |
ok, first = await maybe(thunk()) | |
if not ok: return [] | |
return [first] + (await gather(lambda: ret2nd(inter(), thunk()))) | |
async def expect(string): | |
for ch in string: | |
ex = await munch() | |
if ex != ch: await fail() | |
return string | |
def skipWs(): | |
return gather(lambda: guard(munch(), str.isspace)) | |
def string(): # horror | |
def char(): | |
return choice( | |
ret2nd( | |
expect("\\"), | |
choice( | |
expect("\""), | |
expect("\\"), | |
fmap(expect("b"), lambda _: "\b"), | |
fmap(expect("f"), lambda _: "\f"), | |
fmap(expect("n"), lambda _: "\n"), | |
fmap(expect("r"), lambda _: "\r"), | |
fmap(expect("t"), lambda _: "\t"), | |
) | |
), | |
guard(munch(), "\"".__ne__)) | |
return fmap(ret2nd(expect("\""), gather(char), expect("\"")), "".join) | |
async def num(): | |
neg, _ = await maybe(expect("-")) | |
value = int("".join(await gatherPlus(lambda: guard(munch(), lambda x: "0" <= x <= "9")))) | |
return -value if neg else value | |
def jsonArray(): # terror | |
return ret2nd( | |
expect("["), | |
gatherBetween( | |
json, | |
lambda: expect(",") | |
), | |
skipWs(), | |
expect("]") | |
) | |
def jsonObject(): | |
async def parseEnt(): | |
await skipWs() | |
key = await string() | |
await skipWs() | |
await expect(":") | |
return (key, await json()) | |
return fmap(ret2nd( | |
expect("{"), | |
gatherBetween( | |
parseEnt, | |
lambda: expect(",") | |
), | |
skipWs(), | |
expect("}") | |
), dict) | |
def json(): # parser combinator hell | |
return ret2nd( | |
skipWs(), | |
choice( | |
fmap(expect("true"), lambda _: True), | |
fmap(expect("false"), lambda _: False), | |
fmap(expect("null"), lambda _: None), | |
num(), | |
string(), | |
jsonArray(), | |
jsonObject() | |
), | |
skipWs() | |
) | |
# and now for the magic part: pulling a parser out my ass | |
def runParser(text, code): | |
amb = [] # proximal possibilities list for choice() | |
mark = 0 # position to backtrack to | |
ptr = 0 # current parse position | |
arg = None # facilitate coroutine calling | |
stack = [] # stack of choice nodes | |
with catch_warnings(): | |
# source(s): trust me | |
filterwarnings("ignore", "coroutine .* was never awaited") | |
while True: | |
try: | |
cmd = code.send(arg) | |
except StopIteration as si: | |
# handle return from coro | |
for a in amb: a.close() | |
if stack: | |
arg = si.value | |
code, amb, mark = stack.pop() | |
continue | |
else: return si.value, text[ptr:] | |
match cmd: | |
case ("eof",): | |
if ptr == len(text): | |
arg = None | |
continue | |
case ("munch",): | |
if ptr < len(text): | |
arg = text[ptr] | |
ptr += 1 | |
continue | |
case ("choice", choices): | |
if choices: | |
# handle branch | |
stack.append((code, amb, mark)) | |
mark = ptr | |
code, *amb = choices | |
arg = None | |
amb.reverse() | |
continue | |
# handle parse fail | |
code.close() | |
while not amb: | |
if stack: | |
code, amb, mark = stack.pop() | |
code.close() | |
else: return None | |
ptr = mark | |
code = amb.pop() | |
arg = None | |
print(runParser(r'["foo", null, {"bar": [3, "baz", false]}, true]', json())) | |
# prints: | |
# (['foo', None, {'bar': [3, 'baz', False]}, True], '') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
the trick is not to do something badly, but to do something extremely wrong very well. ^_^