Last active
July 12, 2025 01:00
-
-
Save tangentstorm/bc18af38a36c3b6b32630b56529e5780 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> chs('abc')(on('apple')) | |
{ mk: -1, ix: 1, ch: 'p', ib: 'apple', mb: 1 } | |
> chs('abc')(on('zapple')) | |
{ mk: -1, ix: 0, ch: 'z', ib: 'zapple', mb: 0 } | |
> chs('zabc')(on('zapple')) | |
{ mk: -1, ix: 1, ch: 'a', ib: 'zapple', mb: 1 } | |
> chs('abcz')(on('zapple')) | |
{ mk: -1, ix: 1, ch: 'a', ib: 'zapple', mb: 1 } | |
> chs('abcz')(on('')) | |
{ mk: -1, ix: 0, ch: undefined, ib: '', mb: 0 } | |
> seq([chr('a'),chr('b'),chr('c')])(on('x')) | |
{ mk: -1, ix: 0, ch: 'x', ib: 'x', mb: 0 } | |
> seq([chr('a'),chr('b'),chr('c')])(on('abc')) | |
{ mk: -1, ix: 3, ch: undefined, ib: 'abc', mb: 1 } | |
> seq([chr('a'),chr('b'),chr('c')])(on('abcx')) | |
{ mk: -1, ix: 3, ch: 'x', ib: 'abcx', mb: 1 } | |
> seq([chr('a'),chr('b'),chr('c')])(on('abc')) | |
{ mk: -1, ix: 3, ch: undefined, ib: 'abc', mb: 1 } | |
> seq([chr('a'),chr('b'),chr('c'),end])(on('abc')) | |
{ mk: -1, ix: 3, ch: undefined, ib: 'abc', mb: 1 } | |
> seq([chr('a'),chr('b'),chr('c'),end])(on('abcx')) | |
{ mk: -1, ix: 0, ch: 'a', ib: 'abcx', mb: 0 } | |
> alt([chr('a'),chr('b')])(on('xabc')) | |
{ mk: -1, ix: 0, ch: 'x', ib: 'xabc', mb: 0 } | |
> alt([chr('a'),chr('b')])(on('abc')) | |
{ mk: -1, ix: 1, ch: 'b', ib: 'abc', mb: 1 } | |
> alt([chr('a'),chr('b')])(on('xabc')) | |
{ mk: -1, ix: 0, ch: 'x', ib: 'xabc', mb: 0 } | |
> alt([chr('a'),chr('x')])(on('xabc')) | |
{ mk: -1, ix: 1, ch: 'a', ib: 'xabc', mb: 1 } | |
> alt([chr('a'),chr('b')])(on('bac')) | |
{ mk: -1, ix: 1, ch: 'a', ib: 'bac', mb: 1 } | |
> alt([chr('a'),chr('b')])(on('')) | |
{ mk: -1, ix: 0, ch: undefined, ib: '', mb: 0 } | |
> alt([chr('a'),chr('b'),end])(on('')) | |
{ mk: -1, ix: 0, ch: undefined, ib: '', mb: 1 } | |
> opt(chr('x'))(on('hello')) | |
{ mk: -1, ix: 0, ch: 'h', ib: 'hello', mb: 1 } | |
> opt(chr('h'))(on('hello')) | |
{ mk: -1, ix: 1, ch: 'e', ib: 'hello', mb: 1 } | |
> lit=cs=>seq([...cs].map(chr)) | |
[Function: lit] | |
> lit('abc')(on('abc')) | |
{ mk: -1, ix: 3, ch: undefined, ib: 'abc', mb: 1 } | |
> lit('abc')(on('xabc')) | |
{ mk: -1, ix: 0, ch: 'x', ib: 'xabc', mb: 0 } | |
> lit('abc')(on('abcx')) | |
{ mk: -1, ix: 3, ch: 'x', ib: 'abcx', mb: 1 } | |
> not(lit('cat'))(on('bat')) | |
{ mk: -1, ix: 1, ch: 'a', ib: 'bat', mb: 1 } | |
> not(lit('cat'))(on('catbat')) | |
{ mk: -1, ix: 0, ch: 'c', ib: 'catbat', mb: 0 } | |
> not(lit('cat'))(on('catch')) | |
{ mk: -1, ix: 0, ch: 'c', ib: 'catch', mb: 0 } | |
> not(lit('cat'))(on('match')) | |
{ mk: -1, ix: 1, ch: 'a', ib: 'match', mb: 1 } | |
> rep(chr('a'))(on('apple')) | |
{ mk: -1, ix: 1, ch: 'p', ib: 'apple', mb: 1 } | |
> rep(chr('a'))(on('apple')) | |
{ mk: -1, ix: 1, ch: 'p', ib: 'apple', mb: 1 } | |
> rep(chr('a'))(on('aardvark')) | |
{ mk: -1, ix: 2, ch: 'r', ib: 'aardvark', mb: 1 } | |
> rep(chr('a'))(on('potato')) | |
{ mk: -1, ix: 0, ch: 'p', ib: 'potato', mb: 0 } | |
> orp(chr('a'))(on('apple')) | |
{ mk: -1, ix: 1, ch: 'p', ib: 'apple', mb: 1 } | |
> orp(chr('a'))(on('aardvark')) | |
{ mk: -1, ix: 2, ch: 'r', ib: 'aardvark', mb: 1 } | |
> orp(chr('a'))(on('potato')) | |
{ mk: -1, ix: 0, ch: 'p', ib: 'potato', mb: 1 } | |
> sep(any, chr(','))(on('a,b,c')) | |
{ mb: 1, ib: 'a,b,c', ch: undefined, ix: 5, mk: -1 } | |
> sep(any, chr(','))(on('a,bb,c')) | |
{ mb: 1, ib: 'a,bb,c', ch: 'b', ix: 3, mk: -1 } | |
// these two should both match, but tokenize differently | |
// (if i had tok() yet) | |
> sep(rep(any), chr(','))(on('a,bb,c')) | |
{ mb: 1, ib: 'a,bb,c', ch: undefined, ix: 6, mk: -1 } | |
> sep(rep(not(chr(','))), chr(','))(on('a,bb,c')) | |
{ mb: 1, ib: 'a,bb,c', ch: undefined, ix: 6, mk: -1 } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// parser combinators in j | |
// -- general struct helpers --------------------- | |
let set=(o,k,v)=>{let r={...o}; r[k]=v; return r} | |
// gs(k)(o)=>o[k] gs(k)(v,o)=>set(o,k,v) | |
// the odd order is so that the value can be | |
// right next to the setter and we get something | |
// like a pipeline with a bunch of parens on the right | |
let gs=k=>(x,y)=> y===undefined ? x[k] : set(y,k,x) | |
// -- parse state -------------------------------- | |
// getters and setters | |
// todo: add some kind of validator/type coercion for e.g., mb(true) | |
let mb=gs('mb'), // match bit | |
ib=gs('ib'), // input buffer | |
ix=gs('ix'), // index into input buffer | |
ch=gs('ch'), // current character (ib[ix]) | |
mk=gs('mk'); // start index of current token | |
let s0=()=>({mb:0,ib:'',ch:'',ix:-1,mk:-1}) | |
on=s=>ix(0,ch(s[0],ib(s,s0()))) | |
m1=s=>mb(1,s) // match | |
m0=s=>mb(0,s) // fail | |
// move to next ch | |
let nx=s=>{let i=1+ix(s),b=ib(s); return ch(b[i], ix(i, s))} | |
// test: nx(nx(on('abc'))) | |
// call nx n time;, n may be boolean | |
fw=(n,s)=>{for(let i=0;i<n;i++)s=nx(s); return s} | |
// -- parser combinators ------------------------- | |
emp = m1 // always matches, consumes nothing | |
// fwm : foward match n chars if n > 0, else fail | |
fwm=(n,s)=>mb(+n, fw(+n,s)) | |
// any: match any character, fail at end of input | |
any=s=>{let m=ix(s)<ib(s).length; return mb(+m, fw(m, s))} | |
any=s=>fwm(ix(s)<ib(s).length, s) | |
// neg: invert the match bit | |
neg=p=>s=>mb(1-mb(p(s)),s) | |
// test: neg(any)(on('x')) | |
// end : matches end of input | |
end=neg(any) | |
// chr : match a single character | |
chr=c=>s=>fwm(c==ib(s)[ix(s)],s) | |
// chs : choose / character set. match 1 char if it's in cs | |
chs=cs=>s=>fwm(cs.includes(ib(s)[ix(s)]),s) | |
// seq : match all patterns in sequence | |
seq=ps=>s=>{let r=s;for(p of ps){r=p(r);if(!mb(r))return m0(s)}return r} | |
// alt : accept the first matching pattern, or fail | |
alt=ps=>s=>{for(p of ps){let r=p(s);if(mb(r))return r} return m0(s)} | |
// opt: optional (kleene "?") | |
opt=p=>alt([p,emp]) | |
// lit: shorthand for seq(chr,chr,chr,chr) | |
lit=cs=>seq([...cs].map(chr)) | |
// more parser combinators | |
// rep : repeat p 1 or more times (klene "+") | |
// rep =: {{ y (<&ix mb ]) u^:mb^:_ I y }} | |
rep=p=>s=>{let r=m1(s);while(mb(r)){r=p(r)}return mb(+(ix(s)<ix(r)),r)} | |
// orp : repeat p 0 or more times (optional repeat) (kleene "*") | |
orp=p=>alt([rep(p), emp]) | |
// not : match any character unless p matches | |
not=p=> seq([neg(p),any]) | |
// sep : match 1 or more p, separated by d (both are patterns) | |
sep=(p,d)=>seq([p, orp(seq([d, p]))]) | |
sep(any,chr()) | |
// -- todo --------------------------------------- | |
// j/k style functional expression | |
// (so you don't have to balance parens) | |
// jk(ix(0), ch(s[0]), ib(s), s0()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment