Last active
December 30, 2020 09:54
-
-
Save golopot/633ef34f6f84e8d712655f82d76a2a0f to your computer and use it in GitHub Desktop.
Operator precedence parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @typedef {{ | |
* source: string, | |
* index: number, | |
* }} Parser | |
*/ | |
/** | |
* @typedef {any} Expression | |
*/ | |
/** | |
* @param {Parser} parser | |
* @returns {string} | |
*/ | |
function getChar(parser) { | |
return parser.source[parser.index]; | |
} | |
/** | |
* @param {Parser} parser | |
* @param {RegExp} regexp | |
* @returns {string | undefined} | |
*/ | |
function matchRegexp(parser, regexp) { | |
const exec = regexp.exec(parser.source.slice(parser.index)); | |
if (exec !== null) { | |
return exec[0]; | |
} | |
return undefined; | |
} | |
/** | |
* @param {Parser} parser | |
*/ | |
function skipSpaces(parser) { | |
while (parser.index < parser.source.length) { | |
switch (parser.source[parser.index]) { | |
case ' ': | |
case '\n': | |
case '\r': | |
case '\t': | |
parser.index += 1; | |
continue; | |
default: | |
return; | |
} | |
} | |
} | |
/** | |
* @param {Parser} parser | |
* @returns {string | undefined} | |
*/ | |
function getToken(parser) { | |
skipSpaces(parser); | |
const c = getChar(parser); | |
switch (c) { | |
case '+': | |
case '-': | |
case '*': | |
case '-': | |
case '.': | |
case '!': | |
case '(': | |
case ')': | |
return c; | |
default: | |
} | |
const identifier = matchRegexp(parser, /^[a-zA-Z]+/); | |
if (identifier !== undefined) { | |
return identifier; | |
} | |
const numberLiteral = matchRegexp(parser, /^[0-9]+/); | |
if (numberLiteral !== undefined) { | |
return numberLiteral; | |
} | |
return undefined; | |
} | |
/** | |
* @param {Parser} parser | |
* @param {string} token | |
*/ | |
function consumeToken(parser, token) { | |
const actualToken = getToken(parser); | |
if (actualToken === token) { | |
parser.index += token.length; | |
return; | |
} | |
throw new Error(`Expected \`${token}\`.`); | |
} | |
/** | |
* @param {Parser} parser | |
* @returns {any} | |
*/ | |
function parsePrimaryExpression(parser) { | |
const token = getToken(parser); | |
if (token === undefined) { | |
return undefined; | |
} | |
switch (token) { | |
case '!': { | |
parser.index += 1; | |
const arg = parsePrimaryExpression(parser); | |
return { | |
type: 'UnaryExpression', | |
operator: '!', | |
prefix: true, | |
argument: arg, | |
}; | |
} | |
case '(': { | |
parser.index += 1; | |
const expression = parseExpression(parser, 0); | |
consumeToken(parser, ')'); | |
return { | |
type: 'ParenthesisExpression', | |
expression: expression, | |
}; | |
} | |
} | |
if (!(/^[a-zA-Z]+$/.test(token) || !/^[0-9]+$/.test(token))) { | |
throw new Error('Expecting an identifier or a number literal.'); | |
} | |
parser.index += token.length; | |
return token; | |
} | |
const operators = new Map([ | |
['*', 3], | |
['/', 3], | |
['+', 2], | |
['-', 2], | |
['>', 1], | |
['<', 1], | |
]); | |
/** | |
* @param {string} operator | |
* @returns {number} | |
*/ | |
function getOperatorPrecedence(operator) { | |
const p = operators.get(operator); | |
if (p === undefined) { | |
throw new Error(`Cannot find precedence for operator \`${operator}\`.`); | |
} | |
return p; | |
} | |
/** | |
* @param {Parser} parser | |
* @returns {boolean} | |
*/ | |
function isEOF(parser) { | |
return parser.index === parser.source.length; | |
} | |
/** | |
* | |
* @param {Expression} left | |
* @param {Expression} right | |
* @param {string} operator | |
* @returns {Expression} | |
*/ | |
function makeBinaryExpressionNode(operator, left, right) { | |
return { | |
type: 'BinaryExpression', | |
operator: operator, | |
left: left, | |
right: right, | |
}; | |
} | |
/** | |
* @param {Parser} parser | |
* @param {number} minPrecedence | |
*/ | |
function consumeOperatorWithMinPrecedence(parser, minPrecedence) { | |
if (isEOF(parser)) { | |
return undefined; | |
} | |
const token = getToken(parser); | |
if (token === undefined || !operators.has(token)) { | |
return undefined; | |
} | |
if (getOperatorPrecedence(token) > minPrecedence) { | |
parser.index += token.length; | |
return token; | |
} | |
return undefined; | |
} | |
/** | |
* @param {Parser} parser | |
* @param {number} minPrecedence | |
* @returns {Expression} | |
*/ | |
function parseExpression(parser, minPrecedence) { | |
let node = parsePrimaryExpression(parser); | |
for ( | |
let operator = consumeOperatorWithMinPrecedence(parser, minPrecedence); | |
operator !== undefined; | |
operator = consumeOperatorWithMinPrecedence(parser, minPrecedence) | |
) { | |
let right = parseExpression(parser, getOperatorPrecedence(operator)); | |
node = makeBinaryExpressionNode(operator, node, right); | |
} | |
return node; | |
} | |
/** | |
* @param {string} source | |
*/ | |
function parse(source) { | |
const parser = { | |
source: source, | |
index: 0, | |
}; | |
return parseExpression(parser, 0); | |
} | |
function main() { | |
const ast = parse('a * b + c'); | |
console.log(ast); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is an example of operator precedence parser implemented in Javascript. The core part of the algorithm is at L212-L225.