Last active
February 5, 2025 01:09
-
-
Save SimonMeskens/aab3d22a66df337d6347b292bcad016b to your computer and use it in GitHub Desktop.
JavaScript climbing parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2023 Simon Meskens | |
// Licensed under the MIT License | |
// | |
// Permission to use, copy, modify, and/or distribute this software for any | |
// purpose with or without fee is hereby granted, provided this license is | |
// preserved. This software is offered as-is, without any warranty. | |
const types = [ | |
[null, /[^\P{Pattern_White_Space}\n]+/msuy], | |
["break", /\n/msuy], | |
["string", /"((?:[^"\\/\b\f\n\r\t]|\\u\d{4})*)"/msuy], | |
["delimiter", /[(){}[\]]/msuy], | |
["number", /-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?/msuy], | |
["operator", /\p{Pattern_Syntax}+/msuy], | |
["identifier", /\p{XID_Start}\p{XID_Continue}*/msuy], | |
["unknown", /[^\p{Pattern_White_Space}\p{Pattern_Syntax}\p{XID_Start}]+/msuy], | |
]; | |
export function lex(str, grammar = types) { | |
let cursor = 0; | |
let tokens = []; | |
while (cursor < str.length) { | |
let matched = 0; | |
for (const [type, pattern] of grammar) { | |
pattern.lastIndex = cursor; | |
if (pattern.test(str)) { | |
if (type != null) | |
tokens.push({ type, value: str.slice(cursor, pattern.lastIndex) }); | |
matched = pattern.lastIndex - cursor; | |
break; | |
} | |
} | |
if (matched === 0) throw new SyntaxError("Lexer error"); | |
else cursor += matched; | |
} | |
return tokens; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2023 Simon Meskens | |
// Licensed under the MIT License | |
// | |
// Permission to use, copy, modify, and/or distribute this software for any | |
// purpose with or without fee is hereby granted, provided this license is | |
// preserved. This software is offered as-is, without any warranty. | |
import { peekable, nextValue, peekValue } from "./peekable.js"; | |
import { parent, literal, hub } from "./tree.js"; | |
// Operator example | |
const operators = [ | |
{ | |
"+": { arity: 2, associativity: "left" }, | |
"-": { arity: 2, associativity: "left" }, | |
}, | |
{ | |
"*": { arity: 2, associativity: "left" }, | |
"/": { arity: 2, associativity: "left" }, | |
}, | |
{ | |
"-": { arity: 1, associativity: "right" }, | |
"+": { arity: 1, associativity: "right" }, | |
}, | |
{ | |
"**": { arity: 2, associativity: "right" }, | |
}, | |
{ | |
"()": { left: "(", right: ")" }, | |
}, | |
]; | |
export function parse(tokens, grammar = operators) { | |
const iterator = peekable(tokens[Symbol.iterator]()); | |
const dictionary = { | |
prefix: {}, | |
postfix: {}, | |
delimiters: {}, | |
}; | |
for (let precedence = 0; precedence < grammar.length; precedence++) { | |
for (const [key, operator] of Object.entries(grammar[precedence])) { | |
operator.precedence = precedence + 1; | |
operator.value = key; | |
if ("left" in operator) dictionary.delimiters[operator.left] = operator; | |
else if (operator.arity === 1) dictionary.prefix[key] = operator; | |
else if (operator.arity === 2) dictionary.postfix[key] = operator; | |
} | |
} | |
const statements = parent("juxtaposition", []); | |
while (peekValue(iterator) !== null) { | |
const exp = expression(dictionary, iterator, 0); | |
if (exp !== null) statements.children.push(exp); | |
} | |
if (iterator.next().done === true) { | |
if (statements.children.length === 1) return statements.children[0]; | |
else return statements; | |
} else throw new SyntaxError(); | |
} | |
function isJuxtaposable(token, dictionary) { | |
return ( | |
token !== null && | |
!(token.type === "break") && | |
(token.type !== "delimiter" || token.value in dictionary.delimiters) && | |
!isPostfixOperator(token, dictionary) | |
); | |
} | |
function isPostfixOperator(operator, dictionary) { | |
return ( | |
operator !== null && | |
operator.type === "operator" && | |
operator.value in dictionary.postfix | |
); | |
} | |
function expression(dictionary, iterator, precedence) { | |
let tree = prefix(dictionary, iterator); | |
if (tree === null) return null; | |
for ( | |
let next = peekValue(iterator); | |
isPostfixOperator(next, dictionary) && | |
dictionary.postfix[next.value].precedence >= precedence; | |
next = peekValue(iterator) | |
) { | |
const operator = dictionary.postfix[nextValue(iterator).value]; | |
const adjPrecedence = | |
operator.associativity === "left" | |
? operator.precedence + 1 | |
: operator.precedence; | |
tree = hub("operation", operator.value, [ | |
tree, | |
expression(dictionary, iterator, adjPrecedence), | |
]); | |
next = peekValue(iterator); | |
} | |
const juxtaposition = parent("juxtaposition", [tree]); | |
while (isJuxtaposable(peekValue(iterator), dictionary)) { | |
const exp = expression(dictionary, iterator, 0); | |
if (exp !== null) | |
if (exp.type === "juxtaposition") | |
juxtaposition.children.push(...exp.children); | |
else juxtaposition.children.push(exp); | |
} | |
if (juxtaposition.children.length === 1) return tree; | |
else return juxtaposition; | |
} | |
function prefix(dictionary, iterator) { | |
const next = peekValue(iterator); | |
if (next.type === "operator" && next.value in dictionary.prefix) { | |
const operator = dictionary.prefix[nextValue(iterator).value]; | |
const child = expression(dictionary, iterator, operator.precedence); | |
return hub("operation", operator.value, [child]); | |
} else if (next.type === "delimiter" && next.value in dictionary.delimiters) { | |
const { left, right, value } = | |
dictionary.delimiters[nextValue(iterator).value]; | |
const inner = expression(dictionary, iterator, 0); | |
if (nextValue(iterator).value === right) | |
return hub("group", left + right, [inner]); | |
else throw new SyntaxError(`Unexpected end of group: ${value}`); | |
} else if (next.type === "identifier") { | |
const { value } = nextValue(iterator); | |
return literal("identifier", value); | |
} else if (next.type === "string" || next.type === "number") { | |
const token = nextValue(iterator); | |
const valueType = token.type; | |
const value = JSON.parse(token.value); | |
return { type: "literal", valueType, value }; | |
} else if (next.type === "break") { | |
nextValue(iterator); | |
return null; | |
} | |
throw new SyntaxError( | |
`Unknown token: ${JSON.stringify(next.value)} (${next.type})`, | |
); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2023 Simon Meskens | |
// Licensed under the MIT License | |
// | |
// Permission to use, copy, modify, and/or distribute this software for any | |
// purpose with or without fee is hereby granted, provided this license is | |
// preserved. This software is offered as-is, without any warranty. | |
export function peekable(iterator) { | |
if (iterator.peek) return iterator; | |
let next = iterator.next(); | |
return { | |
next() { | |
const n = next; | |
next = iterator.next(); | |
return n; | |
}, | |
peek: () => next, | |
}; | |
} | |
export function nextValue(iterator) { | |
const { done, value } = iterator.next(); | |
if (!done) return value; | |
else return null; | |
} | |
export function peekValue(iterator) { | |
const { done, value } = iterator.peek(); | |
if (!done) return value; | |
else return null; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2023 Simon Meskens | |
// Licensed under the MIT License | |
// | |
// Permission to use, copy, modify, and/or distribute this software for any | |
// purpose with or without fee is hereby granted, provided this license is | |
// preserved. This software is offered as-is, without any warranty. | |
export const node = (type, position) => | |
position ? { type, position } : { type }; | |
export const literal = (type, value, position) => | |
position ? { type, value, position } : { type, value }; | |
export const parent = (type, children, position) => | |
position ? { type, children, position } : { type, children }; | |
export const hub = (type, value, children, position) => | |
position ? { type, value, children, position } : { type, value, children }; | |
export const point = (line, column, offset) => | |
offset !== undefined ? { line, column, offset } : { line, column }; | |
export const position = (start, end) => { | |
if (Array.isArray(start)) start = point(...start); | |
if (Array.isArray(end)) end = point(...end); | |
return { start, end }; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment