Created
December 14, 2012 04:37
-
-
Save raskasa/4282723 to your computer and use it in GitHub Desktop.
An example of the parsing component of a compiler for reference.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
/* constants */ | |
#define NUM 257 | |
/* global */ | |
int currentToken; | |
int currentAttribute; | |
/* Prototypes */ | |
/* The error routine is void, so it needs a prototype */ | |
void error( char* ); | |
void match( int ); | |
/* MAIN | |
* This is the initial implementation of a tokenizer | |
* used when designing and implementing a compiler. It | |
* tokenizes input from stdin (lexical analysis), and parses | |
* it to determines if the input is syntactically correct | |
* (grammar analysis). It then, evaluta | |
*/ | |
main () { | |
int c, value; | |
/* initialized currentToken */ | |
currentToken = getToken(); | |
/* call parser */ | |
value = expr(); | |
/* output value */ | |
fprintf(stderr, "\n\nValue = %d\n", value); | |
} | |
/* | |
* expr -> term expr' | |
* expr' -> '+' term expr' | empty | |
*/ | |
int expr() { | |
int value = term(); | |
while(1) { | |
if ( currentToken == '+' ) { | |
match('+'); | |
value += term(); /* this gives meaning to the input '+' symbol */ | |
} else return value; | |
} | |
} | |
/* term -> factor term' | |
* term' -> '*' factor term' | empty | |
*/ | |
int term() { | |
int value = factor(); | |
while(1) { | |
if ( currentToken == '*' ) { | |
match('*'); | |
value *= factor(); | |
} else return value; | |
} | |
} | |
/* | |
* factor -> '(' expr ')' | NUM | |
*/ | |
int factor() { | |
int value; | |
if ( currentToken == '(') { | |
match('('); | |
value = expr(); | |
match(')'); | |
return value; | |
} else if ( currentToken == NUM ) { | |
value = currentAttribute; | |
match(NUM); | |
return value; | |
} else error("Factor: expecting ( or NUM"); | |
} | |
/* tokenizer or scanner: | |
* returns the next token | |
*/ | |
int getToken() { | |
int c, value; | |
while ( (c = getchar()) != EOF ) { // EOF is an integer, so you need an int to catch it | |
switch(c) { | |
case '+': | |
case '*': | |
case '(': | |
case ')': | |
fprintf(stderr, "[%c]", c); | |
return c; /* return operators and brackets as is */ | |
case ' ': | |
case '\t': | |
case '\n': | |
fprintf(stderr, "%c", c); | |
continue; | |
default: | |
if ( isdigit(c) ) { | |
value = 0; | |
do { | |
value = value*10 + (c - '0'); /* used for converting ASCII to int) */ | |
} while(isdigit( c = getchar()) ); | |
ungetc(c, stdin); | |
currentAttribute = value; | |
fprintf(stderr, "[NUM:%d]", value); | |
return NUM; | |
} else { | |
error("getToken: unknown character"); | |
} | |
} | |
} | |
return c; | |
} | |
void match(int expectedToken) { | |
if ( currentToken == expectedToken ) { | |
currentToken = getToken(); | |
} else { | |
error("Match: unexpected token"); | |
} | |
} | |
void error( char* message ) { | |
fprintf(stderr, "Error: %s\n", message); | |
exit(1); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment