Skip to content

Instantly share code, notes, and snippets.

@lqt0223
Created November 16, 2023 06:38
Show Gist options
  • Save lqt0223/07c9345ed225d9f877b17159c8e7e4a5 to your computer and use it in GitHub Desktop.
Save lqt0223/07c9345ed225d9f877b17159c8e7e4a5 to your computer and use it in GitHub Desktop.
40 parsing LISP like expression
/*
* a function that parses LISP like expression into AST
* the CFG will be like
* C = ( op args ) --- an expression is enclosed in a parenthesis, with 'op' as head, and 'args' as tail
* args = arg* --- an args (argument list) contains zero or variadic-length arguments
* arg => num | C --- an arg is either a num or an expression
* op = +|-|*|/
* num = [0-9]*
*/
const parse = function(s) {
// tokenizer - i is the character position
const tokens = []
let i = 0
while (i < s.length) {
let c = s[i]
if (c === ' ') {
i++
continue
} else if ('+-*/'.indexOf(c) > -1) {
tokens.push(c)
} else if (c === '(') {
tokens.push(c)
} else if (c === ')') {
tokens.push(c)
} else if ('0123456789'.indexOf(c) > -1) {
let temp = ''
while (1) {
c = s[i]
if ('0123456789'.indexOf(c) === -1) {
i--
break
}
temp += c
i++
}
tokens.push(temp)
}
i++
}
// parser, j is the token position
let j = 0
// see if next token matches some condition, if matched, advance
const next = (pred) => {
const peek = tokens[j]
if (pred(peek)) {
j++
return peek
} else {
return false
}
}
// functions for symbols that is only one token long
const num = () => {
const result = next(a => !Number.isNaN(parseInt(a)))
if (result) {
return { type: 'num', content: parseInt(result) }
} else {
return false
}
}
const op = () => {
let result
result = next(a => a === '+')
if (result) {
return { type: 'op', content: '+'}
}
result = next(a => a === '-')
if (result) {
return { type: 'op', content: '-'}
}
result = next(a => a === '*')
if (result) {
return { type: 'op', content: '*'}
}
result = next(a => a === '/')
if (result) {
return { type: 'op', content: '/'}
}
return false
}
// functions for other terminals and non-terminals
const c = () => {
let result
const _j = j // save the state before parsing this symbol
result = next(a => a === '(')
if (!result) return false // the called method is only "next" by now, no need to backtrack
const opNode = op()
if (!opNode) {
j = _j // the called method maybe "next" and "op", and backtrack is needed if parsing failed
return false
}
const argNodes = []
// use loop for a symbol that is expected repeatedly
while (1) {
const argNode = arg()
if (!argNode) {
break
// return false
}
argNodes.push(argNode)
}
result = next(a => a === ')')
if (!result) {
j = _j
return false
}
return { type: 'call', op: opNode, args: argNodes }
}
const arg = () => {
let result
const _j = j
result = c()
if (result) return result
result = num()
if (result) return result
j = _j
return false
}
return c() // start parsing with the start symbol
};
const s = "(+ (+ 2 3) (- 4) (- 5 6))"
const result = parse(s)
console.log(JSON.stringify(result, null, 2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment