-
-
Save glebec/572196e2ca30d9afe09c38b4e9d2b227 to your computer and use it in GitHub Desktop.
| /** | |
| * Minimal demo of parser combinators, possibly as a target for recent | |
| * JS web dev graduates to implement as an exercise. | |
| * | |
| * Huge credit to Hutton, Meijer, and Swierstra for their papers | |
| * on the subject. | |
| */ | |
| class Parser { | |
| // :: (String -> { result: a, remaining: String } | Null) -> Parser a | |
| constructor (parserFn) { | |
| this._parserFn = parserFn | |
| } | |
| // :: (String -> { result: a, remaining: String } | Null) -> Parser a | |
| static from (parserFn) { | |
| return new Parser(parserFn) | |
| } | |
| // :: Parser a ~> String -> { result: a, remaining: String } | Null | |
| parse (string) { | |
| return this._parserFn(string) | |
| } | |
| // :: String -> Parser String | |
| static literal (string) { | |
| return Parser.from(tokens => { | |
| if (!tokens.startsWith(string)) return null | |
| return { | |
| result: string, | |
| remaining: tokens.slice(string.length), | |
| } | |
| }) | |
| } | |
| // :: Parser a ~> Parser b -> Parser (a | b) | |
| or (p2) { | |
| return Parser.from(tokens => this.parse(tokens) || p2.parse(tokens)) | |
| } | |
| // :: ...Parser * -> Parser * | |
| static any (...ps) { | |
| return ps.reduce((anyP, p) => anyP.or(p)) | |
| } | |
| // :: (* -> Parser a) -> Parser a | |
| static lazy (mkParser) { | |
| return Parser.from(tokens => mkParser().parse(tokens)) | |
| } | |
| // :: a -> Parser a | |
| static of (value) { // aka unit, pure, return, inject | |
| return Parser.from(string => ({ | |
| result: value, | |
| remaining: string, | |
| })) | |
| } | |
| // :: Parser a ~> (a -> Parser b) -> Parser b | |
| chain (step) { // aka bind, then, flatMap | |
| return Parser.from(tokens => { | |
| const res1 = this.parse(tokens) | |
| if (!res1) return null | |
| const p2 = step(res1.result) | |
| return p2.parse(res1.remaining) | |
| }) | |
| } | |
| // :: Parser a ~> (a -> b) -> Parser b | |
| map (f) { | |
| return this.chain(x => Parser.of(f(x))) | |
| } | |
| // :: Parser a -> Parser [a] | |
| static many0 (p1) { | |
| return p1.chain(r => { | |
| return Parser.many0(p1).chain(rs => { | |
| return Parser.of([r, ...rs]) | |
| }) | |
| }).or(Parser.of([])) | |
| } | |
| // :: Parser a -> Parser [a] | |
| static many1 (p1) { | |
| return p1.chain(r => { | |
| return Parser.many0(p1).chain(rs => { | |
| return Parser.of([r, ...rs]) | |
| }) | |
| }) | |
| } | |
| // :: Parser a ~> Parser b -> Parser b | |
| useRight (p2) { | |
| return this.chain(() => p2) | |
| } | |
| // :: Parser a ~> Parser b -> Parser a | |
| useLeft (p2) { | |
| return this.chain(left => p2.chain(() => Parser.of(left))) | |
| } | |
| } | |
| const P = Parser | |
| /** | |
| * Demonstration using math expressions | |
| */ | |
| const DIGIT = // :: Parser Number | |
| P.any(...'0123456789'.split('').map(P.literal)) | |
| const SPACE = // :: Parser String | |
| P.many0(P.literal(' ')) | |
| const NUM = // :: Parser Number | |
| P.many1(DIGIT) | |
| .map(nums => +nums.join('')) | |
| const FACTOR = // :: Parser Number | |
| P.any( | |
| P.literal('(') | |
| .useRight(SPACE) | |
| .useRight(P.lazy(() => EXPR)) | |
| .useLeft(SPACE) | |
| .useLeft(P.literal(')')), | |
| P.literal('-') | |
| .useRight(P.lazy(() => FACTOR)) | |
| .map(n => -n), | |
| NUM | |
| ) | |
| const F2 = // :: Parser Number | |
| P.any( | |
| SPACE | |
| .useRight(P.literal('*')) | |
| .useRight(SPACE) | |
| .useRight(FACTOR) | |
| .chain(n1 => F2 | |
| .map(n2 => n1 * n2)), | |
| SPACE | |
| .useRight(P.literal('/')) | |
| .useRight(SPACE) | |
| .useRight(FACTOR) | |
| .chain(n1 => F2 | |
| .map(n2 => 1/n1 * n2)), | |
| P.of(1) | |
| ) | |
| const TERM = // :: Parser Number | |
| FACTOR | |
| .chain(n1 => F2 | |
| .map(n2 => n1 * n2)) | |
| const T2 = // :: Parser Number | |
| P.any( | |
| SPACE | |
| .useRight(P.literal('+')) | |
| .useRight(SPACE) | |
| .useRight(TERM) | |
| .chain(n1 => T2 | |
| .map(n2 => n1 + n2)), | |
| SPACE | |
| .useRight(P.literal('-')) | |
| .useRight(SPACE) | |
| .useRight(TERM) | |
| .chain(n1 => T2 | |
| .map(n2 => -n1 + n2)), | |
| P.of(0) | |
| ) | |
| const EXPR = // :: Parser Number | |
| TERM | |
| .chain(n1 => T2 | |
| .map(n2 => n1 + n2)) | |
| const { result } = EXPR.parse('-5 * -(4 + -2) / (0 + 5) - 3 * 2 is pretty cool') | |
| console.log(result) // -4 |
Since a lot of the methods in this class are static, have you considered simply using an object with functions of the appropriate type? In other words:
// :: type Parser a = String -> [(a, String)]
const Parser = (() => {
// ...
// :: (a -> Parser b) -> Parser a -> Parser b
const chain = f => p => s => p(s).reduce((r, [a, s_]) => [...r, ...f(a)(s_)], [])
return { ..., chain }
})()@masaeedu that's a good point and I will think on it, but the main reason I used a class was to have infix methods. Your chain takes both parsers explicitly, which is precisely what I wanted to avoid as now you've transformed an infix position (p1.chain(makeP2)) into prefix (chain(makeP2, p1)). This isn't ideal for things like useRight: p1.useRight(p2).useRight(p3).useLeft(p4).useLeft(p5) becomes useLeft(useLeft(useRight(useRight(p1, p2), p3), p4), p5).
Slack followup from masaeedu:
You can actually have your cake and eat it too, with
p1 |> chain(makeP2)orFn.pipe([p1, useRight(p2), useRight(p3), ...]), etc. (lots of different flavors of this)
Another good point, but that layers on even more new concepts for exercise-takers to tackle while also understanding parsers and chain. Might make a good refactoring challenge though.
Moved to GitHub repo
Notes to self on exercise design:
chainregexas a parser, to get around having to implementmany0&many1useRightanduseLeftmany0andmany1and drop all uses ofregexmap.