Created
January 8, 2024 13:21
-
-
Save wernsey/eb1be08a75b5b57df811265aeb4712a6 to your computer and use it in GitHub Desktop.
Example of a Pratt parser for mathematical expressions in C99
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Example of a Pratt parser for mathematical expressions in C99. | |
Done mostly to get my head around the concepts | |
* [Pratt Parsers: Expression Parsing Made Easy ](https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) by Munificent | |
* He also talks about it in [chapter 17 of Crafting Interpreters](https://craftinginterpreters.com/compiling-expressions.html#a-pratt-parser) | |
* <https://dev.to/jrop/pratt-parsing> - has a lot of pseudo code and uses | |
the same terminology that Pratt used in his original paper. | |
* <https://langdev.stackexchange.com/a/3275> - very thorough explanation. | |
Would upvote if I was signed in | |
* [Demystifying Pratt Parsers](https://martin.janiczek.cz/2023/07/03/demystifying-pratt-parsers.html) | |
Example code in Elm, but nice diagrams | |
* [Simple but Powerful Pratt Parsing](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) | |
- in Rust, but I wont hold that against it. | |
* [On Recursive Descent and Pratt Parsing](https://chidiwilliams.com/posts/on-recursive-descent-and-pratt-parsing) | |
also has a concise implementation | |
* [Top-Down operator precedence (Pratt) parsing](https://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing) | |
on Eli Bendersky's blog | |
* The Fredrik Lundh article he cites is now here: | |
[Simple Top-Down Parsing in Python](https://11l-lang.org/archive/simple-top-down-parsing/) | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <ctype.h> | |
#include <math.h> | |
#include <assert.h> | |
typedef double number_t; | |
typedef struct { | |
const char *text; | |
int i; | |
number_t number; | |
} context_t; | |
number_t expression(context_t *,int); | |
typedef enum { | |
END, | |
NUMBER, | |
PLUS, | |
MINUS, | |
MUL, | |
DIV, | |
EXP, | |
OPEN_PAREN, CLOSE_PAREN, | |
} token_t; | |
typedef number_t (*nud_t)(context_t *, token_t); | |
typedef number_t (*led_t)(context_t *, number_t, token_t); | |
token_t tokenize(const char *text, unsigned int *ip, number_t *number); | |
int prec(token_t); | |
void error(const char *text, int i, const char *e) { | |
fprintf(stderr, "error: %s\n", e); | |
fprintf(stderr, " %s\n", text); | |
fprintf(stderr, " %*c\n", i, '^'); | |
abort(); | |
} | |
token_t Token; | |
token_t next_token(context_t *C) { | |
Token = tokenize(C->text, &C->i, &C->number); | |
return Token; | |
} | |
typedef struct { | |
int lbp; | |
nud_t nud; | |
led_t led; | |
} operator_t; | |
number_t nud_number(context_t *C, token_t token) { | |
return C->number; | |
} | |
number_t nud_negate(context_t *C, token_t token) { | |
return -expression(C, 100); | |
} | |
number_t nud_parens(context_t *C, token_t token) { | |
number_t result = expression(C, 0); | |
if(Token != CLOSE_PAREN) | |
error(C->text, C->i, "')' expected"); | |
next_token(C); | |
return result; | |
} | |
number_t binop(context_t *C, number_t left, token_t token) { | |
switch(token) { | |
case MINUS: return left - expression(C, prec(token)); | |
case PLUS: return left + expression(C, prec(token)); | |
case MUL: return left * expression(C, prec(token)); | |
case DIV: return left / expression(C, prec(token)); | |
case EXP: return pow(left, expression(C, prec(token)-1)); | |
default: error(C->text, C->i, "operator??"); | |
} | |
} | |
static const operator_t operators[] = { | |
[END] = {}, | |
[NUMBER] = { .nud = nud_number}, | |
[MINUS] = {.lbp = 10, .nud = nud_negate, .led = binop}, | |
[PLUS] = {.lbp = 10, .led = binop}, | |
[MUL] = {.lbp = 20, .led = binop}, | |
[DIV] = {.lbp = 20, .led = binop}, | |
[EXP] = {.lbp = 30, .led = binop}, | |
[OPEN_PAREN] = {.nud = nud_parens}, | |
[CLOSE_PAREN] = {}, | |
}; | |
int prec(token_t t) { | |
return operators[t].lbp; | |
} | |
number_t expression(context_t *C, int rbp) { | |
token_t token = Token; | |
next_token(C); | |
if(token == END) | |
error(C->text, C->i, "unexpected end of expression"); | |
nud_t nud = operators[token].nud; | |
if(!nud) | |
error(C->text, C->i, "expression expected"); | |
number_t left = nud(C, token); | |
//printf("%*cnud %d left:%g (lbp:%d, rbp:%d)\n", rbp, ' ', token, left, operators[token].lbp, rbp); | |
while(rbp < operators[Token].lbp) { | |
token = Token; | |
next_token(C); | |
//printf("%*cled %d left:%g (lbp:%d, rbp:%d)\n", rbp, ' ', token, left, operators[token].lbp, rbp); | |
assert(operators[token].led); | |
left = operators[token].led(C, left, token); | |
} | |
return left; | |
} | |
number_t parse(const char *text) { | |
context_t C = {.text = text}; | |
next_token(&C); | |
return expression(&C, 0); | |
} | |
token_t tokenize(const char *text, unsigned int *ip, number_t *number) { | |
while(isspace(text[*ip])) | |
(*ip)++; | |
if(!text[*ip]) return END; | |
if(isdigit(text[*ip])) { | |
char token[64]; | |
int n = 0; | |
while(isdigit(text[*ip]) || text[*ip] == '.') { | |
if(n == sizeof token - 1) | |
error(text, *ip, "token too long"); | |
token[n++] = text[(*ip)++]; | |
} | |
token[n] = '\0'; | |
if(number) | |
*number = atof(token); | |
return NUMBER; | |
} | |
switch(text[(*ip)++]) { | |
case '+': return PLUS; | |
case '-': return MINUS; | |
case '*': return MUL; | |
case '/': return DIV; | |
case '(': return OPEN_PAREN; | |
case ')': return CLOSE_PAREN; | |
case '^': return EXP; | |
default: error(text, *ip, "Unrecogniseed token"); | |
} | |
return END; | |
} | |
int main(int argc, char *argv[]) { | |
const char *in; | |
if(argc > 1) | |
in = argv[1]; | |
else | |
in = "5+5"; | |
printf("in: %s\n", in); | |
#if 0 | |
int i = 0; | |
token_t t; | |
do { | |
number_t n; | |
t = tokenize(in, &i, &n); | |
if(t == NUMBER) | |
printf("%d (%g)\n", t, n); | |
else | |
printf("%d\n", t); | |
} while(t != END); | |
printf("------------------\n"); | |
#endif | |
number_t result = parse(in); | |
printf("result = %g\n", result); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment