Skip to content

Instantly share code, notes, and snippets.

@tangentstorm
Last active July 12, 2025 01:00
Show Gist options
  • Save tangentstorm/bc18af38a36c3b6b32630b56529e5780 to your computer and use it in GitHub Desktop.
Save tangentstorm/bc18af38a36c3b6b32630b56529e5780 to your computer and use it in GitHub Desktop.
> chs('abc')(on('apple'))
{ mk: -1, ix: 1, ch: 'p', ib: 'apple', mb: 1 }
> chs('abc')(on('zapple'))
{ mk: -1, ix: 0, ch: 'z', ib: 'zapple', mb: 0 }
> chs('zabc')(on('zapple'))
{ mk: -1, ix: 1, ch: 'a', ib: 'zapple', mb: 1 }
> chs('abcz')(on('zapple'))
{ mk: -1, ix: 1, ch: 'a', ib: 'zapple', mb: 1 }
> chs('abcz')(on(''))
{ mk: -1, ix: 0, ch: undefined, ib: '', mb: 0 }
> seq([chr('a'),chr('b'),chr('c')])(on('x'))
{ mk: -1, ix: 0, ch: 'x', ib: 'x', mb: 0 }
> seq([chr('a'),chr('b'),chr('c')])(on('abc'))
{ mk: -1, ix: 3, ch: undefined, ib: 'abc', mb: 1 }
> seq([chr('a'),chr('b'),chr('c')])(on('abcx'))
{ mk: -1, ix: 3, ch: 'x', ib: 'abcx', mb: 1 }
> seq([chr('a'),chr('b'),chr('c')])(on('abc'))
{ mk: -1, ix: 3, ch: undefined, ib: 'abc', mb: 1 }
> seq([chr('a'),chr('b'),chr('c'),end])(on('abc'))
{ mk: -1, ix: 3, ch: undefined, ib: 'abc', mb: 1 }
> seq([chr('a'),chr('b'),chr('c'),end])(on('abcx'))
{ mk: -1, ix: 0, ch: 'a', ib: 'abcx', mb: 0 }
> alt([chr('a'),chr('b')])(on('xabc'))
{ mk: -1, ix: 0, ch: 'x', ib: 'xabc', mb: 0 }
> alt([chr('a'),chr('b')])(on('abc'))
{ mk: -1, ix: 1, ch: 'b', ib: 'abc', mb: 1 }
> alt([chr('a'),chr('b')])(on('xabc'))
{ mk: -1, ix: 0, ch: 'x', ib: 'xabc', mb: 0 }
> alt([chr('a'),chr('x')])(on('xabc'))
{ mk: -1, ix: 1, ch: 'a', ib: 'xabc', mb: 1 }
> alt([chr('a'),chr('b')])(on('bac'))
{ mk: -1, ix: 1, ch: 'a', ib: 'bac', mb: 1 }
> alt([chr('a'),chr('b')])(on(''))
{ mk: -1, ix: 0, ch: undefined, ib: '', mb: 0 }
> alt([chr('a'),chr('b'),end])(on(''))
{ mk: -1, ix: 0, ch: undefined, ib: '', mb: 1 }
> opt(chr('x'))(on('hello'))
{ mk: -1, ix: 0, ch: 'h', ib: 'hello', mb: 1 }
> opt(chr('h'))(on('hello'))
{ mk: -1, ix: 1, ch: 'e', ib: 'hello', mb: 1 }
> lit=cs=>seq([...cs].map(chr))
[Function: lit]
> lit('abc')(on('abc'))
{ mk: -1, ix: 3, ch: undefined, ib: 'abc', mb: 1 }
> lit('abc')(on('xabc'))
{ mk: -1, ix: 0, ch: 'x', ib: 'xabc', mb: 0 }
> lit('abc')(on('abcx'))
{ mk: -1, ix: 3, ch: 'x', ib: 'abcx', mb: 1 }
> not(lit('cat'))(on('bat'))
{ mk: -1, ix: 1, ch: 'a', ib: 'bat', mb: 1 }
> not(lit('cat'))(on('catbat'))
{ mk: -1, ix: 0, ch: 'c', ib: 'catbat', mb: 0 }
> not(lit('cat'))(on('catch'))
{ mk: -1, ix: 0, ch: 'c', ib: 'catch', mb: 0 }
> not(lit('cat'))(on('match'))
{ mk: -1, ix: 1, ch: 'a', ib: 'match', mb: 1 }
> rep(chr('a'))(on('apple'))
{ mk: -1, ix: 1, ch: 'p', ib: 'apple', mb: 1 }
> rep(chr('a'))(on('apple'))
{ mk: -1, ix: 1, ch: 'p', ib: 'apple', mb: 1 }
> rep(chr('a'))(on('aardvark'))
{ mk: -1, ix: 2, ch: 'r', ib: 'aardvark', mb: 1 }
> rep(chr('a'))(on('potato'))
{ mk: -1, ix: 0, ch: 'p', ib: 'potato', mb: 0 }
> orp(chr('a'))(on('apple'))
{ mk: -1, ix: 1, ch: 'p', ib: 'apple', mb: 1 }
> orp(chr('a'))(on('aardvark'))
{ mk: -1, ix: 2, ch: 'r', ib: 'aardvark', mb: 1 }
> orp(chr('a'))(on('potato'))
{ mk: -1, ix: 0, ch: 'p', ib: 'potato', mb: 1 }
> sep(any, chr(','))(on('a,b,c'))
{ mb: 1, ib: 'a,b,c', ch: undefined, ix: 5, mk: -1 }
> sep(any, chr(','))(on('a,bb,c'))
{ mb: 1, ib: 'a,bb,c', ch: 'b', ix: 3, mk: -1 }
// these two should both match, but tokenize differently
// (if i had tok() yet)
> sep(rep(any), chr(','))(on('a,bb,c'))
{ mb: 1, ib: 'a,bb,c', ch: undefined, ix: 6, mk: -1 }
> sep(rep(not(chr(','))), chr(','))(on('a,bb,c'))
{ mb: 1, ib: 'a,bb,c', ch: undefined, ix: 6, mk: -1 }
// parser combinators in j
// -- general struct helpers ---------------------
let set=(o,k,v)=>{let r={...o}; r[k]=v; return r}
// gs(k)(o)=>o[k] gs(k)(v,o)=>set(o,k,v)
// the odd order is so that the value can be
// right next to the setter and we get something
// like a pipeline with a bunch of parens on the right
let gs=k=>(x,y)=> y===undefined ? x[k] : set(y,k,x)
// -- parse state --------------------------------
// getters and setters
// todo: add some kind of validator/type coercion for e.g., mb(true)
let mb=gs('mb'), // match bit
ib=gs('ib'), // input buffer
ix=gs('ix'), // index into input buffer
ch=gs('ch'), // current character (ib[ix])
mk=gs('mk'); // start index of current token
let s0=()=>({mb:0,ib:'',ch:'',ix:-1,mk:-1})
on=s=>ix(0,ch(s[0],ib(s,s0())))
m1=s=>mb(1,s) // match
m0=s=>mb(0,s) // fail
// move to next ch
let nx=s=>{let i=1+ix(s),b=ib(s); return ch(b[i], ix(i, s))}
// test: nx(nx(on('abc')))
// call nx n time;, n may be boolean
fw=(n,s)=>{for(let i=0;i<n;i++)s=nx(s); return s}
// -- parser combinators -------------------------
emp = m1 // always matches, consumes nothing
// fwm : foward match n chars if n > 0, else fail
fwm=(n,s)=>mb(+n, fw(+n,s))
// any: match any character, fail at end of input
any=s=>{let m=ix(s)<ib(s).length; return mb(+m, fw(m, s))}
any=s=>fwm(ix(s)<ib(s).length, s)
// neg: invert the match bit
neg=p=>s=>mb(1-mb(p(s)),s)
// test: neg(any)(on('x'))
// end : matches end of input
end=neg(any)
// chr : match a single character
chr=c=>s=>fwm(c==ib(s)[ix(s)],s)
// chs : choose / character set. match 1 char if it's in cs
chs=cs=>s=>fwm(cs.includes(ib(s)[ix(s)]),s)
// seq : match all patterns in sequence
seq=ps=>s=>{let r=s;for(p of ps){r=p(r);if(!mb(r))return m0(s)}return r}
// alt : accept the first matching pattern, or fail
alt=ps=>s=>{for(p of ps){let r=p(s);if(mb(r))return r} return m0(s)}
// opt: optional (kleene "?")
opt=p=>alt([p,emp])
// lit: shorthand for seq(chr,chr,chr,chr)
lit=cs=>seq([...cs].map(chr))
// more parser combinators
// rep : repeat p 1 or more times (klene "+")
// rep =: {{ y (<&ix mb ]) u^:mb^:_ I y }}
rep=p=>s=>{let r=m1(s);while(mb(r)){r=p(r)}return mb(+(ix(s)<ix(r)),r)}
// orp : repeat p 0 or more times (optional repeat) (kleene "*")
orp=p=>alt([rep(p), emp])
// not : match any character unless p matches
not=p=> seq([neg(p),any])
// sep : match 1 or more p, separated by d (both are patterns)
sep=(p,d)=>seq([p, orp(seq([d, p]))])
sep(any,chr())
// -- todo ---------------------------------------
// j/k style functional expression
// (so you don't have to balance parens)
// jk(ix(0), ch(s[0]), ib(s), s0())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment