Created
November 13, 2023 18:49
-
-
Save zoren/54562f1a3903d19ac29c04a4865d99b7 to your computer and use it in GitHub Desktop.
a parse for a language that has only parentheses, whitespace, and words
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// this is a parser for a language described in this tweet: https://twitter.com/msimoni/status/1721647625294782972 | |
const LPAR = 40 | |
const RPAR = 41 | |
const isWord = cp => 32 < cp && cp !== LPAR && cp !== RPAR | |
export const parse = inputString => { | |
const warnings = [] | |
const topLevelArray = [] | |
let currentArray = topLevelArray | |
const stack = [currentArray] | |
const wordBuffer = [] | |
let i = 0 | |
for (const character of inputString) { | |
const codePoint = character.codePointAt(0) | |
if (isWord(codePoint)) wordBuffer.push(codePoint) | |
// if the state was a word, but no more, emit the word | |
if (wordBuffer.length && !isWord(codePoint)) { | |
currentArray.push(String.fromCodePoint(...wordBuffer)) | |
wordBuffer.length = 0 | |
} | |
switch (codePoint) { | |
case LPAR: { | |
const newArray = [] | |
currentArray.push(newArray) | |
stack.push(newArray) | |
currentArray = newArray | |
break | |
} | |
case RPAR: { | |
// only pop if we have something to pop, this allows for extra closing parens | |
if (stack.length === 1) | |
warnings.push({ type: 'extraClosingParen', index: i }) | |
else { | |
stack.pop() | |
currentArray = stack.at(-1) | |
} | |
break | |
} | |
} | |
i++ | |
} | |
if (wordBuffer.length) currentArray.push(String.fromCodePoint(...wordBuffer)) | |
if (stack.length !== 1) | |
warnings.push({ type: 'unclosedParens', number: stack.length - 1 }) | |
return { forms: topLevelArray, warnings } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment