danvk · December 6, 2023 03:13
diff --git a/parser.test.ts b/parser.test.ts
 import { assertEquals } from "https://deno.land/[email protected]/assert/mod.ts";

 import { parseLisp, tokenize } from "./parser.ts";

 Deno.test("tokenizer", () => {
  const input = "(first (list 1 (+ 2 3) 9))";
  assertEquals(
    [...tokenize(input)],
    ["(", "first", "(", "list", 1, "(", "+", 2, 3, ")", 9, ")", ")"],
  );
 });

 Deno.test("LISP parser", () => {
  const input = "(first (list 1 (+ 2 3) 9))";
  assertEquals(parseLisp(input), ["first", ["list", 1, ["+", 2, 3], 9]]);

  assertEquals(parseLisp("((lambda x (+ x x)) 10)"), [
    ["lambda", "x", ["+", "x", "x"]],
    10,
  ]);
 });
diff --git a/parser.ts b/parser.ts
 #!/usr/bin/env -S deno run
 // Run either via "./parser.ts" or "deno run parser.ts"

 export type AST = number | string | AST[];

 const SPECIAL_CHARS = " \n\t()";
 export function* tokenize(text: string) {
  while (text) {
    const c = text[0];

    if (SPECIAL_CHARS.includes(c)) {
      text = text.slice(1);
      if (c === " " || c === "\n" || c === "\t") {
        // ignore whitespace
        continue;
      } else if (c === "(" || c === ")") {
        yield c;
        continue;
      }
    } else {
      // scan until next special character
      let pos = 1;
      for (; pos < text.length; pos++) {
        if (SPECIAL_CHARS.includes(text.charAt(pos))) {
          break;
        }
      }
      const tokenText = text.slice(0, pos);
      if (!isNaN(Number(tokenText))) {
        yield Number(tokenText);
      } else {
        yield tokenText;
      }
      text = text.slice(pos);
    }
  }
 }

 // firstToken is the current token, stream is a stream of remaining tokens.
 export function parseStream(
  firstToken: string | number,
  stream: Generator<string | number>,
 ): AST {
  if (firstToken === "(") {
    // recursively parse each expression in the list until we hit our ')'.
    const result: AST = [];
    while (true) {
      const v = stream.next();
      if (v.done) {
        throw new Error("Premature end of token stream");
      }
      const token = v.value;
      if (token === ")") {
        return result;
      }
      // either a primitive or another list; let the recursive call figure it out.
      result.push(parseStream(token, stream));
    }
  } else if (firstToken === ")") {
    throw new Error('Surprise ")" token');
  } else {
    return firstToken;
  }
 }

 export function parseLisp(text: string): AST {
  const stream = tokenize(text);
  const first = stream.next();
  if (first.done) {
    throw new Error("Empty string is invalid");
  }
  return parseStream(first.value, stream);
 }

 if (import.meta.main) {
  // [ "first", [ "list", 1, [ "+", 2, 3 ], 9 ] ]
  let input = "(first (list 1 (+ 2 3) 9))";
  console.log(parseLisp(input));

  // [ [ "lambda", "x", [ "+", "x", "x" ] ], 10 ]
  input = "((lambda x (+ x x)) 10)";
  console.log(parseLisp(input));
 }
	import { assertEquals } from "https://deno.land/[email protected]/assert/mod.ts";

	import { parseLisp, tokenize } from "./parser.ts";

	Deno.test("tokenizer", () => {
	const input = "(first (list 1 (+ 2 3) 9))";
	assertEquals(
	[...tokenize(input)],
	["(", "first", "(", "list", 1, "(", "+", 2, 3, ")", 9, ")", ")"],
	);
	});

	Deno.test("LISP parser", () => {
	const input = "(first (list 1 (+ 2 3) 9))";
	assertEquals(parseLisp(input), ["first", ["list", 1, ["+", 2, 3], 9]]);

	assertEquals(parseLisp("((lambda x (+ x x)) 10)"), [
	["lambda", "x", ["+", "x", "x"]],
	10,
	]);
	});
	#!/usr/bin/env -S deno run
	// Run either via "./parser.ts" or "deno run parser.ts"

	export type AST = number \| string \| AST[];

	const SPECIAL_CHARS = " \n\t()";
	export function* tokenize(text: string) {
	while (text) {
	const c = text[0];

	if (SPECIAL_CHARS.includes(c)) {
	text = text.slice(1);
	if (c === " " \|\| c === "\n" \|\| c === "\t") {
	// ignore whitespace
	continue;
	} else if (c === "(" \|\| c === ")") {
	yield c;
	continue;
	}
	} else {
	// scan until next special character
	let pos = 1;
	for (; pos < text.length; pos++) {
	if (SPECIAL_CHARS.includes(text.charAt(pos))) {
	break;
	}
	}
	const tokenText = text.slice(0, pos);
	if (!isNaN(Number(tokenText))) {
	yield Number(tokenText);
	} else {
	yield tokenText;
	}
	text = text.slice(pos);
	}
	}
	}

	// firstToken is the current token, stream is a stream of remaining tokens.
	export function parseStream(
	firstToken: string \| number,
	stream: Generator<string \| number>,
	): AST {
	if (firstToken === "(") {
	// recursively parse each expression in the list until we hit our ')'.
	const result: AST = [];
	while (true) {
	const v = stream.next();
	if (v.done) {
	throw new Error("Premature end of token stream");
	}
	const token = v.value;
	if (token === ")") {
	return result;
	}
	// either a primitive or another list; let the recursive call figure it out.
	result.push(parseStream(token, stream));
	}
	} else if (firstToken === ")") {
	throw new Error('Surprise ")" token');
	} else {
	return firstToken;
	}
	}

	export function parseLisp(text: string): AST {
	const stream = tokenize(text);
	const first = stream.next();
	if (first.done) {
	throw new Error("Empty string is invalid");
	}
	return parseStream(first.value, stream);
	}

	if (import.meta.main) {
	// [ "first", [ "list", 1, [ "+", 2, 3 ], 9 ] ]
	let input = "(first (list 1 (+ 2 3) 9))";
	console.log(parseLisp(input));

	// [ [ "lambda", "x", [ "+", "x", "x" ] ], 10 ]
	input = "((lambda x (+ x x)) 10)";
	console.log(parseLisp(input));
	}