Skip to content

Instantly share code, notes, and snippets.

@raffecat
Created April 27, 2024 04:55
Show Gist options
  • Save raffecat/3401e9f50dc85bbfcba07e4d752eb4fb to your computer and use it in GitHub Desktop.
Save raffecat/3401e9f50dc85bbfcba07e4d752eb4fb to your computer and use it in GitHub Desktop.
Pratt parser practical example
// Pratt parser practical example.
var source = "c^2 > 1-2+3 + d(4) * -(a+1) + 8 * -5^4^3^2 + 1";
var pos = 0;
var unary_precedence = 4
var precedence = {
'=': 1, // equality
'<': 1,
'>': 1,
'+': 2,
'-': 2,
'*': 3,
'/': 3,
// unary precedence = 4
'^': 5, // raise to power
'(': 6,
}
// lexical analyzer: a real parser would return a token enum here;
// this example uses one-char strings to make the code shorter.
function token() {
for (;;) {
var tok = source.charAt(pos++); // next character
if (tok == ' ') continue; // skip spaces
return tok;
}
}
function unget() { pos-- } // undo last token() call (can avoid using "current token" state)
// primary: numbers, var names, grouping, unary operators (like -x)
function primary() {
var tok = token();
switch (tok) {
case '(': // grouping e.g. (a+1)
var e = expr(0); // pass 0 to parse "whole expression" within parentheses
if (token() != ')') throw "missing closing )";
return e;
case '-': return { op:'neg', arg:expr(unary_precedence) };
case '!': return { op:'not', arg:expr(unary_precedence) };
default:
if (/[a-z]/.test(tok)) return { op:'var', name:tok }; // regexp letters
if (/[0-9]/.test(tok)) return { op:'num', value:tok }; // regexp digits
throw "syntax error: "+source.substr(pos-1)+" at "+pos;
}
}
function expr(minPrec) {
var left = primary();
// binary operators like a+b
// loop continues while next operator has higher precedence than caller
// this loop is the key to pratt parsing
while (precedence[tok=token()] > minPrec) { // note: assigns tok
switch (tok) {
case '=': left = { op:'eq', left, right:expr(precedence[tok]) }; break;
case '<': left = { op:'lt', left, right:expr(precedence[tok]) }; break;
case '>': left = { op:'gt', left, right:expr(precedence[tok]) }; break;
case '+': left = { op:'add', left, right:expr(precedence[tok]) }; break;
case '-': left = { op:'sub', left, right:expr(precedence[tok]) }; break;
case '*': left = { op:'mul', left, right:expr(precedence[tok]) }; break;
case '/': left = { op:'div', left, right:expr(precedence[tok]) }; break;
case '^': left = { op:'pow', left, right:expr(precedence[tok]-1) }; break; // pass P-1 for right-associative
case '(': // function call
left = { op:'call', left, right:expr(0) }; // pass 0 to parse "whole expression" within argument list
if (token() != ')') throw "expecting ')' after call argument: "+source.substr(pos-1)+" at "+pos;
break;
default: throw "missing code for operator: "+tok;
}
}
unget(); // loop test above consumed an extra token
return left;
}
function parse() {
// always call expr(0) to parse an expression
var e = expr(0);
// expect end of source here (a larger language with embedded expressions
// won't need this check; enclosing syntax will catch errors)
if (token() != '') throw "syntax error: "+source.substr(pos-1)+" at "+pos;
// now print out the source and the parse tree
console.log(source);
console.log(dumptree(e,""));
}
function dumptree(e,i){var k,s="{ "+e.op;for(k in e)if(k!='op')s+=" "+(typeof
e[k]==='object'?"\n "+i+dumptree(e[k],i+" "):String(e[k]));return s+" }"}
parse();
@raffecat
Copy link
Author

Works with node.js or paste into a browser console

@raffecat
Copy link
Author

The output:

c^2 > 1-2+3 + d(4) * -(a+1) + 8 * -5^4^3^2 + 1
{ gt 
  { pow 
    { var c } 
    { num 2 } } 
  { add 
    { add 
      { add 
        { add 
          { sub 
            { num 1 } 
            { num 2 } } 
          { num 3 } } 
        { mul 
          { call 
            { var d } 
            { num 4 } } 
          { neg 
            { add 
              { var a } 
              { num 1 } } } } } 
      { mul 
        { num 8 } 
        { neg 
          { pow 
            { num 5 } 
            { pow 
              { num 4 } 
              { pow 
                { num 3 } 
                { num 2 } } } } } } } 
    { num 1 } } }

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment