Last active
February 17, 2022 16:24
-
-
Save ToJans/7edd74935025fd03c2735656e1d45669 to your computer and use it in GitHub Desktop.
Minimalistic example to parse javascript.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const TokenExpressions = { | |
comment: /^((\/\/[^\n]+)|(\/\*[^(\*\/)]+)\*\/)/, | |
strings: /^("(\\"|[^"])*")/, | |
multiline_strings: /^(`[^\`]+`)/, | |
whitespace: /^([\s\n\r]+)/, | |
brace: /^[\(\)]/, | |
curlyBrace: /^[\{\}]/, | |
array: /^[\[\]]/, | |
comma: /^\,/, | |
operator: /^(\+|-|\*|\/|=|>|<|>=|<=|&|\||%|!|\^|)/, | |
dot: /^\./, | |
number: /^\d*(\.[\d]+)?/, | |
endOfStatement: /^(;|\n)/, | |
identifier: /^\w+/, | |
token: /^[^\s\w]/ | |
} | |
type TokenType = keyof typeof TokenExpressions; | |
type Token = [TokenType, string]; | |
type ParseNodeItem = Token | ["group", ParseNodeItem[]]; | |
function* tokenize(src: string) { | |
/* some intermediate comment | |
blah | |
*/ | |
const tokens: Token[] = []; | |
// do all kinds of stuff: & a+2 | |
while (src.length) { | |
let item: Token | null = null; | |
for (const key of Object.keys(TokenExpressions)) { | |
const res = src.match(TokenExpressions[key as TokenType]); | |
if (res && res[0].length) { | |
item = [key as TokenType, res[0]] | |
break; | |
} | |
} | |
if (item) { | |
src = src.slice(item[1].length) | |
yield item; | |
} else { | |
throw `unable to parse ****\n${src.length > 30 ? src.slice(30) + "..." : src}\n***`; | |
} | |
} | |
} | |
function parse(tokens: Iterable<[TokenType, string]>) { | |
let node: ParseNodeItem[] = []; | |
const stack: ParseNodeItem[][] = []; | |
for (const token of tokens) { | |
switch (token[0]) { | |
case "array": | |
case "brace": | |
case "curlyBrace": | |
if (token[1].match(/^(\{|\[|\()/)) { | |
const newNode: ParseNodeItem[] = [token]; | |
node.push(["group", newNode as any]) | |
stack.push(node as any); | |
node = newNode; | |
} else { | |
node.push(token as any); | |
node = stack.pop() as any; | |
} | |
break; | |
default: | |
node.push(token); | |
} | |
} | |
return node; | |
} | |
function debugHTML(node: ParseNodeItem[]): string { | |
let outStr = ""; | |
for (let i = 0; i < node.length; i++) { | |
let n = node[i]; | |
outStr+=`<span class="${n[0]}">` | |
if (n[0] == "group") { | |
outStr+=`<span class="group">${debugHTML(n[1])}</span>`; | |
} else { | |
outStr += n[1]; | |
} | |
outStr+="</span>" | |
} | |
return outStr; | |
} | |
const tokenized = new Array(...tokenize(tokenize.toString())) | |
console.log(parse(tokenized)); | |
const $style = document.createElement("style"); | |
$style.setAttribute("type","text/css"); | |
$style.innerHTML = ` | |
.root span { | |
margin: 0em; | |
padding: 0.1em; | |
display: inline; | |
line-height: 2; | |
white-space:pre; | |
font-family: Arial, Helvetica, sans-serif; | |
} | |
.identifier { | |
color: #456789 | |
} | |
.brace, .curlyBrace, .array { | |
color: #789456; | |
} | |
.operator { | |
font-weight: bold; | |
} | |
.comment { | |
color: #999; | |
display: inline-block; | |
} | |
.comment { | |
color: #999; | |
display: inline-block; | |
} | |
.multiline_strings { | |
font-style: italic; | |
font-weight:bold; | |
} | |
.group { | |
display: inline-block; | |
border: 1px gray dashed; | |
padding: 1em; | |
margin: 1em; | |
} | |
.group:first-child > span:first-child, | |
.group:first-child > span:last-child | |
{ | |
color: #783241; | |
font-weight:bold; | |
} | |
.root > span { | |
background-color: #eef; | |
} | |
.root > span > span { | |
background-color: #eff; | |
} | |
.root > span > span { | |
background-color: #fef; | |
} | |
.root > span > span > span { | |
background-color: #ffe; | |
} | |
.root > span > span > span > span{ | |
background-color: #fee; | |
} | |
.root > span > span > span > span > span{ | |
background-color: #ccf; | |
} | |
.root > span > span > span > span > span > span { | |
background-color: #fcc; | |
} | |
.root > span > span > span > span > span > span > span { | |
background-color: #cfc; | |
} | |
.root > span > span > span > span > span > span > span > span{ | |
background-color: #ffc; | |
} | |
.root > span > span > span > span > span > span > span > span{ | |
background-color: #cff; | |
} | |
.root > span > span > span > span > span > span > span > span{ | |
background-color: #fcf; | |
} | |
.root > span > span > span > span > span > span > span > span > span { | |
background-color: #9ff; | |
} | |
.root > span > span > span > span > span > span > span > span > span > span { | |
background-color: #ff9; | |
} | |
.root > span > span > span > span > span > span > span > span > span > span > span { | |
background-color: #f9f; | |
} | |
`; | |
//document.head.append($style); | |
const $div = document.createElement("div"); | |
$div.classList.add("root"); | |
$div.innerHTML = debugHTML(parse(tokenized)); | |
//document.body.append($div); | |
console.log(debugHTML(parse(tokenized))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is the output it generates:
