Created
December 6, 2023 03:13
-
-
Save danvk/c8de56cc9723cdebf9148fd6720985e0 to your computer and use it in GitHub Desktop.
Lisp Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { assertEquals } from "https://deno.land/[email protected]/assert/mod.ts"; | |
import { parseLisp, tokenize } from "./parser.ts"; | |
Deno.test("tokenizer", () => { | |
const input = "(first (list 1 (+ 2 3) 9))"; | |
assertEquals( | |
[...tokenize(input)], | |
["(", "first", "(", "list", 1, "(", "+", 2, 3, ")", 9, ")", ")"], | |
); | |
}); | |
Deno.test("LISP parser", () => { | |
const input = "(first (list 1 (+ 2 3) 9))"; | |
assertEquals(parseLisp(input), ["first", ["list", 1, ["+", 2, 3], 9]]); | |
assertEquals(parseLisp("((lambda x (+ x x)) 10)"), [ | |
["lambda", "x", ["+", "x", "x"]], | |
10, | |
]); | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env -S deno run | |
// Run either via "./parser.ts" or "deno run parser.ts" | |
export type AST = number | string | AST[]; | |
const SPECIAL_CHARS = " \n\t()"; | |
export function* tokenize(text: string) { | |
while (text) { | |
const c = text[0]; | |
if (SPECIAL_CHARS.includes(c)) { | |
text = text.slice(1); | |
if (c === " " || c === "\n" || c === "\t") { | |
// ignore whitespace | |
continue; | |
} else if (c === "(" || c === ")") { | |
yield c; | |
continue; | |
} | |
} else { | |
// scan until next special character | |
let pos = 1; | |
for (; pos < text.length; pos++) { | |
if (SPECIAL_CHARS.includes(text.charAt(pos))) { | |
break; | |
} | |
} | |
const tokenText = text.slice(0, pos); | |
if (!isNaN(Number(tokenText))) { | |
yield Number(tokenText); | |
} else { | |
yield tokenText; | |
} | |
text = text.slice(pos); | |
} | |
} | |
} | |
// firstToken is the current token, stream is a stream of remaining tokens. | |
export function parseStream( | |
firstToken: string | number, | |
stream: Generator<string | number>, | |
): AST { | |
if (firstToken === "(") { | |
// recursively parse each expression in the list until we hit our ')'. | |
const result: AST = []; | |
while (true) { | |
const v = stream.next(); | |
if (v.done) { | |
throw new Error("Premature end of token stream"); | |
} | |
const token = v.value; | |
if (token === ")") { | |
return result; | |
} | |
// either a primitive or another list; let the recursive call figure it out. | |
result.push(parseStream(token, stream)); | |
} | |
} else if (firstToken === ")") { | |
throw new Error('Surprise ")" token'); | |
} else { | |
return firstToken; | |
} | |
} | |
export function parseLisp(text: string): AST { | |
const stream = tokenize(text); | |
const first = stream.next(); | |
if (first.done) { | |
throw new Error("Empty string is invalid"); | |
} | |
return parseStream(first.value, stream); | |
} | |
if (import.meta.main) { | |
// [ "first", [ "list", 1, [ "+", 2, 3 ], 9 ] ] | |
let input = "(first (list 1 (+ 2 3) 9))"; | |
console.log(parseLisp(input)); | |
// [ [ "lambda", "x", [ "+", "x", "x" ] ], 10 ] | |
input = "((lambda x (+ x x)) 10)"; | |
console.log(parseLisp(input)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment