Last active
July 8, 2021 07:34
-
-
Save tangentstorm/6ecf311f2c87bc0b3bb228e113cb703c to your computer and use it in GitHub Desktop.
Rough port of my grammar combinator thing to k3.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/ grammar combinators in K | |
/ for longer description (in python), see: | |
/ http://tangentstorm.github.io/draft/wejalboot.py.html | |
/ -- misc helper functions ------------------------------------ | |
join:{[sep;strs] / join strs with 'sep' as delimiter | |
(#sep) _ ,/ sep,' strs} | |
split:{[sep;str] / split str on sep character | |
s:sep,str; 1 _' (&s=sep) _ s} | |
/ -- combinator constructors ---------------------------------- | |
/ (for our purposes, a combinator is just a function that takes | |
/ some arguments and converts them into a dict with an extra `KIND key) | |
T:{[sym; doc; args]; | |
/ This macro creates a new function, which in turn creates a dict. | |
/ The fields of the tuple are passed in the 'args' parameter. | |
/ The `KIND entry in the dictionary will be the given `sym[bol] | |
/ Additionally, a variable whose name matches `sym will be bound to the function in the current scope. | |
a2s:{1 _ ,/ " `",/: split[";";x]} / arg string to symbol string: a;b -> `a `b | |
src: "{[", args, "] / ",doc, "\n" / source code for a function | |
src:src, " .+((`KIND ", (a2s args), ")\n" / returning a dict with these keys | |
src:src, " (`", ($sym),";", args, "))}" / and these values | |
.[sym; (); :; .(src)]} / compile, assign, and return the function | |
TI:{[sym;doc] T[sym;doc;"item"]} / define a tuple with a single 'item' parameter | |
TL:{[sym;doc] T[sym;doc;"list"]} / define a tuple with a single 'list' parameter | |
/ -- cursor data type ----------------------------------------- | |
T[`CURSOR; "cursor for a sequence"; "seq;pos;val"]; | |
cursor:{CURSOR[x; 0; *x]} | |
fwd:{[cur] / create a new cursor, 1 step ahead in the sequence | |
newpos: cur.pos+1; ch: :[newpos<#cur.seq; cur.seq@newpos; _ci 0] | |
CURSOR[cur.seq; newpos; ch]} | |
/ -- (somewhat :/) generic dispatch system -------------------- | |
/ the `self` here probably needs to be a k namespace path... not sure yet. | |
/ these should all be in a private in a namespace somewhere... | |
find_handler:{[mr; prefix; node] | |
want: `$ join["_"; (prefix; $node.KIND)] / `prefix_KIND | |
res: . want; if[ _n ~ res; res: unhandled] / if no such function, use 'unhandled' instead | |
res} | |
/ not quite so generic because the arguments are specific to this parser thing. | |
dispatch:{[mr; prefix; node; cur; env] | |
h: find_handler[self; prefix; node] | |
h[self; node; cur; env]} | |
unhandled:{[mr; node; cur; env] | |
`0: "!! UNHANDLED NODE KIND: ", ($node.KIND), "\n" | |
M[ `FAIL; cur; env]} | |
/ data types for match state / match results | |
T[`Match; "result type for matches"; "txt;pos"]; | |
T[`M; "Internal match state"; "val;cur;env"]; | |
matched: {[m] ~m.val~`FAIL} | |
do_match:{[mr; node; cur; env] / internal match helper, returns an M | |
dispatch[mr; "match"; node; cur; env]} | |
match:{[mr; node; str] / returns `FAIL or a Match | |
do_match[mr; node; cursor[str]; .()][`val]} | |
mjoin:{[mr; matches] / join a bunch of Match values, or fail if no matches | |
if[0=#matches; :`FAIL] | |
Match[,/(matches .' `txt); matches[0].pos]} | |
/-- grammar combinators, and match handlers ------------------- | |
TI[`Emp; "Empty pattern. Always matches, consumes nothing."]; | |
Emp: Emp[] / singleton | |
match_Emp:{[mr; node; cur; env] | |
:M[Match[""; cur.pos]; cur; env]} | |
TL[`Not; "Fail if the pattern matches. Consumes nothing."]; | |
/ TODO: match_Not | |
TL[`Any; "Match anything. (Same as Not[Emp])"]; | |
/ TODO: match_Any | |
TI[`Lit; "Match a single, specified item."]; | |
match_Lit:{[mr; node; cur; env] | |
:[cur.val ~ node.item | |
:M[Match[cur.val; cur.pos]; fwd[cur]; env] | |
:M[`FAIL; cur.pos; env]]} | |
TL[`Alt; "Match any of the given alternatives."]; | |
match_Alt:{[mr; node; cur; env] | |
res:M[`FAIL; cur; env] | |
i:0; nodes: node.list | |
while[(i<#nodes) & (res.val~`FAIL) | |
res: do_match[mr; nodes[i]; cur; env] | |
if[matched[res]; :res] | |
i+:1] | |
res} | |
TL[`Seq; "Match a sequence of patterns."]; | |
match_Seq:{[mr; node; cur; env] | |
i:0; nodes:node.list; cur0:cur; matches:() | |
while[i<#nodes | |
m: do_match[mr; nodes[i]; cur; env] | |
if[~matched[m]; :M[`FAIL; cur0; env]] | |
matches,: m.val; cur:m.cur; env: m.env | |
i+:1] | |
M[mjoin[mr;matches]; cur; env]} | |
TI[`Rep; "Match 1 or more repetitions."]; | |
match_Rep:{[mr; node; cur; env] | |
matches:(); m.val:`start | |
while[~m.val~`FAIL | |
m: do_match[mr; node.item; cur; env] | |
if[matched[m]; matches,:m.val; cur:m.cur; env:m.env]] | |
if[m.val=`start; :M[`FAIL; cur; env]] | |
M[mjoin[mr; matches]; cur; env]} | |
TI[`Opt; "Match 0 or 1 repetitions."]; | |
match_Opt:{[mr; node; cur; env] | |
do_match[mr; Alt(node.item; Emp); cur; env]} | |
TI[`Orp; "Match 0 or more repetitions."]; | |
match_Orp:{[mr; node; cur; env] | |
do_match[mr; Opt(Rep node.item); cur; env]} | |
/ -- grammar to string helper --------------------------------- | |
g2s:{[g] k:g.KIND / convert grammar to string | |
:[k=`Emp; :"^" | |
k=`Not; :"~(",(g2s g.item),")" | |
k=`Lit; :g.item | |
k=`Alt; :"(",join["|"; g2s'g.list],")" | |
k=`Seq; :,/g2s'g.list | |
k=`Rep; :(g2s g.item),"+" | |
k=`Opt; :(g2s g.item),"?" | |
k=`Orp; :(g2s g.item),"*" | |
' "{UNKNOWN KIND: ",($k),"}"]} | |
/ -- test framework ------------------------------------------- | |
echo: {`0: ($x),"\n"}; nt:0; np:0; | |
assert:{[t;msg] nt::nt+1; if[t; `0:"."; np::np+1]; if[~t; echo "ERROR: ", msg]} | |
shM:{[pat;str] assert[~`FAIL~match[`mr;pat;str]; "'",(g2s pat),"' should match '",str,"'"]} | |
shF:{[pat;str] assert[`FAIL~match[`mr;pat;str]; "'",(g2s pat),"' should fail on '",str,"'"]} | |
cheq:{[a;b] assert[a~b;"(",($a),") != (",($b),")"]} / CHeck EQuality | |
report:{ echo "\nRan ",($nt)," tests. ",($np)," tests passed."} | |
/ -- unit tests ----------------------------------------------- | |
a:Lit "A"; b:Lit "B"; c: Lit "C" | |
cheq["A"; g2s a] | |
cheq["(A|B)"; g2s Alt(a;b)] | |
cheq["AB"; g2s Seq(a;b)] | |
shM[Emp; "anything"] | |
shM[a; "ABC"]; shF[b; "ABC"] | |
shM[Alt(a;b); "ABC"]; shM[Alt(b;a); "ABC"]; shF[Alt(c;b); "ABC"] | |
shM[Seq(a;b); "ABC"]; shF[Seq(b;a); "ABC"]; shM[Seq(a;Alt(a;b)); "AB"] | |
shM[Seq(Rep a;b); "AAAB"]; shF[Rep a; ""] | |
shM[Seq(Opt a;b); "AB"]; shM[Seq(Opt a;b); "BA"]; shF[Seq(Opt a;c); "BA"]; | |
report[] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment