Skip to content

Instantly share code, notes, and snippets.

@SimonMeskens
Last active February 5, 2025 01:09
Show Gist options
  • Save SimonMeskens/aab3d22a66df337d6347b292bcad016b to your computer and use it in GitHub Desktop.
Save SimonMeskens/aab3d22a66df337d6347b292bcad016b to your computer and use it in GitHub Desktop.
JavaScript climbing parser
// Copyright 2023 Simon Meskens
// Licensed under the MIT License
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided this license is
// preserved. This software is offered as-is, without any warranty.
const types = [
[null, /[^\P{Pattern_White_Space}\n]+/msuy],
["break", /\n/msuy],
["string", /"((?:[^"\\/\b\f\n\r\t]|\\u\d{4})*)"/msuy],
["delimiter", /[(){}[\]]/msuy],
["number", /-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?/msuy],
["operator", /\p{Pattern_Syntax}+/msuy],
["identifier", /\p{XID_Start}\p{XID_Continue}*/msuy],
["unknown", /[^\p{Pattern_White_Space}\p{Pattern_Syntax}\p{XID_Start}]+/msuy],
];
export function lex(str, grammar = types) {
let cursor = 0;
let tokens = [];
while (cursor < str.length) {
let matched = 0;
for (const [type, pattern] of grammar) {
pattern.lastIndex = cursor;
if (pattern.test(str)) {
if (type != null)
tokens.push({ type, value: str.slice(cursor, pattern.lastIndex) });
matched = pattern.lastIndex - cursor;
break;
}
}
if (matched === 0) throw new SyntaxError("Lexer error");
else cursor += matched;
}
return tokens;
}
// Copyright 2023 Simon Meskens
// Licensed under the MIT License
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided this license is
// preserved. This software is offered as-is, without any warranty.
import { peekable, nextValue, peekValue } from "./peekable.js";
import { parent, literal, hub } from "./tree.js";
// Operator example
const operators = [
{
"+": { arity: 2, associativity: "left" },
"-": { arity: 2, associativity: "left" },
},
{
"*": { arity: 2, associativity: "left" },
"/": { arity: 2, associativity: "left" },
},
{
"-": { arity: 1, associativity: "right" },
"+": { arity: 1, associativity: "right" },
},
{
"**": { arity: 2, associativity: "right" },
},
{
"()": { left: "(", right: ")" },
},
];
export function parse(tokens, grammar = operators) {
const iterator = peekable(tokens[Symbol.iterator]());
const dictionary = {
prefix: {},
postfix: {},
delimiters: {},
};
for (let precedence = 0; precedence < grammar.length; precedence++) {
for (const [key, operator] of Object.entries(grammar[precedence])) {
operator.precedence = precedence + 1;
operator.value = key;
if ("left" in operator) dictionary.delimiters[operator.left] = operator;
else if (operator.arity === 1) dictionary.prefix[key] = operator;
else if (operator.arity === 2) dictionary.postfix[key] = operator;
}
}
const statements = parent("juxtaposition", []);
while (peekValue(iterator) !== null) {
const exp = expression(dictionary, iterator, 0);
if (exp !== null) statements.children.push(exp);
}
if (iterator.next().done === true) {
if (statements.children.length === 1) return statements.children[0];
else return statements;
} else throw new SyntaxError();
}
function isJuxtaposable(token, dictionary) {
return (
token !== null &&
!(token.type === "break") &&
(token.type !== "delimiter" || token.value in dictionary.delimiters) &&
!isPostfixOperator(token, dictionary)
);
}
function isPostfixOperator(operator, dictionary) {
return (
operator !== null &&
operator.type === "operator" &&
operator.value in dictionary.postfix
);
}
function expression(dictionary, iterator, precedence) {
let tree = prefix(dictionary, iterator);
if (tree === null) return null;
for (
let next = peekValue(iterator);
isPostfixOperator(next, dictionary) &&
dictionary.postfix[next.value].precedence >= precedence;
next = peekValue(iterator)
) {
const operator = dictionary.postfix[nextValue(iterator).value];
const adjPrecedence =
operator.associativity === "left"
? operator.precedence + 1
: operator.precedence;
tree = hub("operation", operator.value, [
tree,
expression(dictionary, iterator, adjPrecedence),
]);
next = peekValue(iterator);
}
const juxtaposition = parent("juxtaposition", [tree]);
while (isJuxtaposable(peekValue(iterator), dictionary)) {
const exp = expression(dictionary, iterator, 0);
if (exp !== null)
if (exp.type === "juxtaposition")
juxtaposition.children.push(...exp.children);
else juxtaposition.children.push(exp);
}
if (juxtaposition.children.length === 1) return tree;
else return juxtaposition;
}
function prefix(dictionary, iterator) {
const next = peekValue(iterator);
if (next.type === "operator" && next.value in dictionary.prefix) {
const operator = dictionary.prefix[nextValue(iterator).value];
const child = expression(dictionary, iterator, operator.precedence);
return hub("operation", operator.value, [child]);
} else if (next.type === "delimiter" && next.value in dictionary.delimiters) {
const { left, right, value } =
dictionary.delimiters[nextValue(iterator).value];
const inner = expression(dictionary, iterator, 0);
if (nextValue(iterator).value === right)
return hub("group", left + right, [inner]);
else throw new SyntaxError(`Unexpected end of group: ${value}`);
} else if (next.type === "identifier") {
const { value } = nextValue(iterator);
return literal("identifier", value);
} else if (next.type === "string" || next.type === "number") {
const token = nextValue(iterator);
const valueType = token.type;
const value = JSON.parse(token.value);
return { type: "literal", valueType, value };
} else if (next.type === "break") {
nextValue(iterator);
return null;
}
throw new SyntaxError(
`Unknown token: ${JSON.stringify(next.value)} (${next.type})`,
);
}
// Copyright 2023 Simon Meskens
// Licensed under the MIT License
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided this license is
// preserved. This software is offered as-is, without any warranty.
export function peekable(iterator) {
if (iterator.peek) return iterator;
let next = iterator.next();
return {
next() {
const n = next;
next = iterator.next();
return n;
},
peek: () => next,
};
}
export function nextValue(iterator) {
const { done, value } = iterator.next();
if (!done) return value;
else return null;
}
export function peekValue(iterator) {
const { done, value } = iterator.peek();
if (!done) return value;
else return null;
}
// Copyright 2023 Simon Meskens
// Licensed under the MIT License
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided this license is
// preserved. This software is offered as-is, without any warranty.
export const node = (type, position) =>
position ? { type, position } : { type };
export const literal = (type, value, position) =>
position ? { type, value, position } : { type, value };
export const parent = (type, children, position) =>
position ? { type, children, position } : { type, children };
export const hub = (type, value, children, position) =>
position ? { type, value, children, position } : { type, value, children };
export const point = (line, column, offset) =>
offset !== undefined ? { line, column, offset } : { line, column };
export const position = (start, end) => {
if (Array.isArray(start)) start = point(...start);
if (Array.isArray(end)) end = point(...end);
return { start, end };
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment