Created
July 18, 2014 17:31
-
-
Save rondorkerin/cb8c5ad6db72adc94658 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
PL0 Compiler | |
By: Stephen Bryant | |
Last update: | |
October 24, 2010 | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#define MAX_IDENTIFIER_LENGTH 11 | |
#define MAX_NUMBER_LENGTH 5 | |
#define NUM_RESERVED_WORDS 15 | |
#define NUM_SPECIAL_SYMBOLS 14 | |
#define MAX_TABLE_SIZE 256 | |
#define MAX_LEXEME_SIZE 1000 | |
#define MAX_STACK_HEIGHT 2000 | |
#define MAX_LEXI_LEVELS 3 | |
#define NUMBER_TOKEN_TYPES 36 | |
#define MAX_CODE_LENGTH 500 | |
typedef enum { //p-machine operation enumerations | |
LIT = 1, OPR = 2, LOD = 3, STO = 4, CAL = 5, INC = 6, | |
JMP = 7, JPC = 8, WRT = 9, SIO = 10} operation; | |
typedef enum { //tokens used in scanner | |
nul = 1,nulsym, identsym, numbersym, plussym, minussym, | |
multsym, slashsym, oddsym, eqsym, neqsym, lessym, leqsym, | |
gtrsym, geqsym, lparentsym, rparentsym, commasym, semicolonsym, | |
periodsym, becomessym, beginsym, endsym, ifsym, thensym, | |
whilesym, dosym, callsym, constsym, varsym, procsym, outsym, | |
insym , elsesym, colonsym, exclamsym } token_type; | |
//maps token names | |
char* token_names[NUMBER_TOKEN_TYPES] = {"","nulsym", "identsym", "numbersym", "plussym", "minussym", //1-6 | |
"multsym", "slashsym", "oddsym", "eqsym", "neqsym", "lessym", "leqsym", //7-13 | |
"gtrsym", "geqsym", "lparentsym", "rparentsym", "commasym", "semicolonsym", //14-19 | |
"periodsym", "becomessym", "beginsym", "endsym", "ifsym", "thensym", //20-25 | |
"whilesym", "dosym", "callsym", "constsym", "varsym", "procsym", "outsym", //26-32 | |
"insym" , "elsesym", "colonsym", "exclamsym"}; //33-36 | |
typedef struct //used in the scanner | |
{ | |
token_type id; //id of token | |
char name[MAX_IDENTIFIER_LENGTH]; //name of token | |
int index; //index in token array | |
} token; | |
typedef struct //used in the p-machine | |
{ | |
operation OP; | |
int L; | |
int M; | |
} instruction; | |
typedef struct //used in the parser | |
{ | |
int kind; //const = 1, var = 2, proc = 3 | |
char name[MAX_IDENTIFIER_LENGTH]; | |
char val[MAX_NUMBER_LENGTH]; //number (ascii value) | |
int level; //L level | |
int addr; //M addr | |
} symbol; | |
/* | |
Scanner stuff | |
*/ | |
//reserved word names | |
char* reservedWords[] = {"null","call","begin","end","if","then","else","while","do","in","out", "odd","const","procedure", "var"}; | |
//internal representation of reserved words | |
token_type wsym[] = {nulsym,callsym, beginsym, endsym, ifsym, thensym, elsesym, whilesym, dosym, insym, outsym, oddsym, constsym,procsym,varsym}; | |
//special symbol names | |
char specialSymbols[] = {'+','-','*','/','(',')','=',',','.','<','>',';',':','!'}; | |
//internal representation of special symbols | |
token_type ssym[] = {plussym,minussym,multsym,slashsym,lparentsym,rparentsym,eqsym,commasym,periodsym,lessym,gtrsym,semicolonsym,colonsym,exclamsym}; | |
token tokenList[MAX_TABLE_SIZE]; | |
int crntTokenListIndex = 0; //current index in token list | |
int tokenListSize = 0; | |
/* | |
Parser Stuff | |
*/ | |
symbol symbol_table[MAX_TABLE_SIZE]; | |
symbol crntSymbol; //current symbol. | |
token_type crntToken; //current token | |
int crntLevel = 0; //current L level | |
int symbolTableSize = 0; | |
int tempMaddr = 0; //temp M used for calculating M addressess | |
/* | |
Codegen stuff | |
*/ | |
int tempLvalue; | |
int mainLineNumber = 0; //the line number of our main method | |
int mainFlag = 0; //basically, when i set this to 1, it means we've already changed the main function's address and we don't need to do it anymore. | |
/* | |
P-machine stuff | |
*/ | |
int stack[MAX_STACK_HEIGHT]; | |
int SP = 0, BP = 1, PC = 0; //stack pointer, base pointer, program counter | |
instruction* IR; //instruction register | |
instruction codeBase[MAX_CODE_LENGTH]; //the code | |
int codeBaseSize; //current size of the code base | |
FILE* inputFile; | |
FILE* outputFile; | |
/* | |
basic function declarations | |
*/ | |
void scan(); | |
void parse(); | |
token_type isReservedWord(char* tempIdentifier); | |
token_type isSpecialSymbol(char temp); | |
symbol findSymbol(char* name, int level); | |
void printSymbolTable(); | |
void insertSymbol(); | |
void addConstantNumber(int temp); | |
void gettok(); | |
int accept(token_type s); | |
void error(int num); | |
void program(); | |
void block(); | |
void statement(); | |
void condition(); | |
void expression(); | |
void term(); | |
void factor(); | |
void codeGen(operation opr, int M, int L); | |
void changeMvalue(int address, int M); | |
void printCodeBase(); | |
void printCodeLine(int i); | |
void printTopOfStack(); | |
void printStack(int tempBP,int tempSP); | |
void inputToStack(); | |
void fetch(); | |
void execute(); | |
void pmachine(); | |
/* | |
Program driving function | |
*/ | |
int main() | |
{ | |
int i; | |
printf("scanning:\n"); | |
scan(); | |
for (i = 0; i < tokenListSize; i++) | |
{ | |
token x = tokenList[i]; | |
printf("%d\n", x.id); | |
} | |
printf("parsing:\n"); | |
// | |
parse(); | |
printf("symbol table:\n"); | |
// | |
for (i = 0; i < symbolTableSize; i++) | |
{ | |
printf("name %s kind %d level %d addr %d val %s\n",symbol_table[i].name,symbol_table[i].kind,symbol_table[i].level,symbol_table[i].addr,symbol_table[i].val); | |
} | |
printf("printing code base:\n"); | |
// | |
printCodeBase(); | |
printf("Executing code in p machine!\n"); | |
// | |
pmachine(); | |
printf("success!\n"); | |
// | |
} | |
/* | |
Populates tokenList with tokens from the program. | |
*/ | |
void scan() | |
{ | |
inputFile = fopen("programinput.txt","r"); | |
outputFile = fopen("lexoutput.txt","w"); | |
char temp; | |
int i; //counter to make sure identifiers and numbers are correct length | |
while ((temp = fgetc(inputFile)) != EOF) //Token reading loop | |
{ | |
token t; | |
token_type type; | |
i = 0; | |
if (isalpha(temp)) | |
{ | |
char tempIdentifier[MAX_IDENTIFIER_LENGTH]; | |
do //read characters into identifier | |
{ | |
tempIdentifier[i] = temp; | |
i++; | |
temp = fgetc(inputFile); | |
} while (isalnum(temp) && temp != EOF); | |
if (i > MAX_IDENTIFIER_LENGTH) //if the identifier becomes too long | |
{ | |
outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner | |
error(27); | |
} | |
else //we've reached something that isn't alphanumeric | |
{ | |
ungetc(temp,inputFile); | |
tempIdentifier[i] = 0; //add null | |
type = isReservedWord(tempIdentifier); | |
if (strcmp(tempIdentifier,"null") == 0) | |
{ | |
t.id = nulsym; | |
} | |
else if (type == nulsym) //it's an identifier | |
{ | |
t.id = identsym; | |
strcpy(t.name,tempIdentifier); | |
} | |
else | |
{ | |
t.id = type; //it's a reserved word | |
strcpy(t.name,tempIdentifier); | |
} | |
} | |
} | |
else if (isdigit(temp)) | |
{ | |
char value[MAX_NUMBER_LENGTH+1]; //temporary identifier | |
do | |
{ | |
value[i] = temp; | |
i++; | |
temp = fgetc(inputFile); | |
}while (isdigit(temp) && temp != EOF); | |
if (i > MAX_NUMBER_LENGTH) | |
{ | |
outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner | |
error(25); //so i made this ad-hoc fix. | |
} | |
else | |
{ | |
ungetc(temp,inputFile); | |
value[i] = 0; //add null char | |
t.id = numbersym; | |
strcpy(t.name,value); | |
} | |
} | |
else if ( (type = isSpecialSymbol(temp)) != nulsym) //assign type to the token_type of temp, make sure it's a token | |
{ | |
switch (type) //int ssym[] = {plussym,minussym,multsym,slashsym,lparentsym,rparentsym,identsym,commasym,periodsym,lessym,gtrsym,semicolonsym,colonsym}; | |
{ | |
case (slashsym): | |
{ | |
temp = fgetc(inputFile); | |
if (isSpecialSymbol(temp) == multsym) //This is a comment | |
{ | |
do | |
{ | |
temp = fgetc(inputFile); | |
}while(isSpecialSymbol(temp) != multsym); | |
temp = fgetc(inputFile); | |
t.id = nulsym; //we disregard this token because it's a comment | |
if (isSpecialSymbol(temp) != slashsym) | |
{ | |
outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner | |
error(28); | |
} | |
} | |
else //this is a slash symbol | |
{ | |
strcpy(t.name,"/"); | |
t.id = slashsym; | |
ungetc(temp,inputFile); | |
} | |
break; | |
} | |
case (colonsym): | |
{ | |
temp = fgetc(inputFile); | |
if (isSpecialSymbol(temp) == eqsym) //if it is a becoming token. | |
{ | |
t.id = becomessym; | |
strcpy(t.name,":="); | |
} | |
else | |
{ | |
ungetc(temp,inputFile); | |
error(1); | |
} | |
break; | |
} | |
case (lessym): | |
{ | |
temp = fgetc(inputFile); | |
if (isSpecialSymbol(temp) == eqsym) // <= token | |
{ | |
t.id = leqsym; | |
strcpy(t.name,"<="); | |
} | |
else | |
{ | |
t.id = lessym; | |
strcpy(t.name,"<"); | |
ungetc(temp,inputFile); | |
} | |
break; | |
} | |
case (gtrsym): | |
{ | |
temp = fgetc(inputFile); | |
if (isSpecialSymbol(temp) == eqsym) //>= token | |
{ | |
t.id = geqsym; | |
strcpy(t.name,">="); | |
} | |
else // > token | |
{ | |
t.id = gtrsym; | |
strcpy(t.name,">"); | |
ungetc(temp,inputFile); | |
} | |
break; | |
} | |
case (exclamsym): | |
{ | |
temp = fgetc(inputFile); | |
if (isSpecialSymbol(temp) == eqsym) //!= token | |
{ | |
t.id = neqsym; | |
strcpy(t.name,"!="); | |
} | |
else // ! token | |
{ | |
outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner | |
error(29); | |
ungetc(temp,inputFile); | |
} | |
break; | |
} | |
default: | |
{ | |
int j; | |
t.id = type; | |
for (j = 0; j < NUM_SPECIAL_SYMBOLS; j++) | |
{ | |
if (ssym[j] == type) | |
{ | |
t.name[0] = specialSymbols[j]; //set the name equal to the | |
t.name[1] = 0; //nulsym | |
} | |
} | |
} | |
} | |
} | |
else if (temp == 32 || temp == 10 || temp == 13 || temp == 9) //it is a whitespace, ignore it. | |
{ | |
t.id = nulsym; //whitespace | |
} | |
else if (temp != 32 && temp != 10 && temp != 13 && temp != 9) //if it's not a white space character | |
{ | |
fprintf(outputFile,"invalid symbol.[%d]",temp); | |
} | |
if (t.id != nulsym) //if id is nulsym, it's a comment and therefore should not be added to a token list. Otherwise, add it to the list. | |
{ | |
tokenList[crntTokenListIndex] = t; | |
t.index = crntTokenListIndex; | |
crntTokenListIndex++; | |
} | |
} | |
tokenListSize = crntTokenListIndex; | |
fclose(inputFile); | |
fclose(outputFile); | |
return; | |
} | |
/* | |
* Scanner helper function. | |
* returns nulsym if the identifier is not found, the identifier otherwise | |
**/ | |
token_type isReservedWord(char* tempIdentifier) | |
{ | |
int i; | |
for (i = 0; i < NUM_RESERVED_WORDS; i++) //check if its a reserved word | |
{ | |
if (strcmp(tempIdentifier,reservedWords[i]) == 0) | |
{ | |
return wsym[i]; //return token_type | |
} | |
} | |
return nulsym; | |
} | |
/* | |
* Scanner helper function | |
* returns nulsym if the character is not a special symbol, returns the token_type if it is a special symbol | |
**/ | |
token_type isSpecialSymbol(char temp) | |
{ | |
int i; | |
for (i = 0; i < NUM_SPECIAL_SYMBOLS; i++) | |
{ | |
if (temp == specialSymbols[i]) | |
{ | |
return ssym[i]; | |
} | |
} | |
return nulsym; | |
} | |
/* | |
Recursive Descent Parser functions | |
*/ | |
/* | |
Parser helper function. | |
prints entire symbol table | |
*/ | |
void printSymbolTable() | |
{ | |
int i; | |
for (i = 0; i < symbolTableSize; i++) | |
{ | |
symbol s = symbol_table[i]; | |
if (s.kind == 1) | |
{ | |
printf("constant %s, value %s\n", s.name, s.val); | |
} | |
else if (s.kind == 2) | |
{ | |
printf("variable %s, L level %d, M addr %d\n", s.name, s.level, s.addr); | |
} | |
else if (s.kind == 3) | |
{ | |
printf("procedure %s, L level %d, M addr %d\n", s.name, s.level, s.addr); | |
} | |
} | |
} | |
/* | |
* parser helper function. | |
* inserts crnt symbol into the table. Updates table if the symbol is already there. | |
*/ | |
void insertSymbol() | |
{ | |
int i; | |
for (i = 0; i < symbolTableSize; i++) | |
{ | |
if (strcmp(symbol_table[i].name,crntSymbol.name) == 0 && symbol_table[i].level == crntSymbol.level) //if they have the same name AND level, error | |
{ | |
error(34); | |
} | |
} | |
if (symbolTableSize < MAX_TABLE_SIZE) //the non-duplicate case. | |
{ | |
symbol_table[symbolTableSize] = crntSymbol; | |
symbolTableSize++; | |
return; | |
} | |
else | |
{ | |
fprintf(outputFile,"Error: max symbolTableSize exceeded."); | |
return; | |
} | |
} | |
/* | |
* returns the symbol from the symbol table with the specified name and level. | |
* if there is no symbol of that name at that level, we keep searching deeper until we find one. | |
*/ | |
symbol findSymbol(char* name, int level) | |
{ | |
int i, lvl; | |
for (lvl = level; lvl >= 0; lvl--) | |
{ | |
for (i = 0; i < symbolTableSize; i++) | |
{ | |
if (strcmp(symbol_table[i].name,name) == 0 && symbol_table[i].level == lvl) | |
{ | |
return symbol_table[i]; | |
} | |
} | |
} | |
} | |
/* | |
* counts the variables on the current level in the symbol table | |
*/ | |
int numVariables(int crntLevel) | |
{ | |
int i, counter = 0; | |
for (i = 0; i < symbolTableSize; i++) | |
{ | |
if (symbol_table[i].level == crntLevel && symbol_table[i].kind == 2) | |
{ | |
counter++; | |
} | |
} | |
return counter; | |
} | |
/* | |
* Parser helper function | |
* gets the next token from the input and sets it to the "crntToken" variable. | |
* Also gets name if its an identifier and gets the value if its a number and sets it to the current symbol. | |
*/ | |
void gettok() | |
{ | |
if (crntTokenListIndex < tokenListSize) //if we haven't gone through all of the tokens | |
{ | |
crntToken = (token_type)tokenList[crntTokenListIndex].id; | |
if (crntToken == identsym) //this is an identifier, so we need to get the name | |
{ | |
strcpy(crntSymbol.name,tokenList[crntTokenListIndex].name); | |
} | |
if (crntToken == numbersym) //Since this is a number token, we're going to update the current symbol with it | |
{ | |
strcpy(crntSymbol.val,tokenList[crntTokenListIndex].name); | |
} | |
crntTokenListIndex++; | |
} | |
} | |
/* | |
* parser helper function | |
* returns 1 if the current symbol equals the symbol we're looking or | |
*/ | |
int accept(token_type s) { | |
if (crntToken == s) { | |
return 1; | |
} | |
return 0; | |
} | |
/* | |
* | |
* error: pass in the error number and it prints out the error and quits parsing | |
*/ | |
void error(int num) | |
{ | |
printf("parsing failed! error number %d. See parseroutput.txt for details.\n",num); | |
switch (num) | |
{ | |
case 1: {fprintf(outputFile, "syntax error: Use = instead of :=");break;} //implemented | |
case 2: {fprintf(outputFile, "syntax error: = must be followed by a number");break;} //implemented | |
case 3: {fprintf(outputFile, "syntax error: Identifier must be followed by =");break;} //implemented | |
case 4: {fprintf(outputFile, "syntax error: const, var, procedure must be followed by identifier");break;} //implemented | |
case 5: {fprintf(outputFile, "syntax error: semicolon or comma missing");break;} //implemented | |
case 6: {fprintf(outputFile, "syntax error: incorrect symbol after procedure declaration");break;} //KIND OF implemented? | |
case 7: {fprintf(outputFile, "syntax error: statement expected.");break;} | |
case 8: {fprintf(outputFile, "syntax error: incorrect symbol after statement part in block.");break;} | |
case 9: {fprintf(outputFile, "syntax error: period expected");break;} //IMPLEMENTED | |
case 10: {fprintf(outputFile, "syntax error: semicolon between statements missing");break;} | |
case 11: {fprintf(outputFile, "syntax error: undeclared identifier");break;} | |
case 12: {fprintf(outputFile, "syntax error: assignment to constant or procedure not allowed");break;} | |
case 13: {fprintf(outputFile, "syntax error: assignment operator expected");break;} | |
case 14: {fprintf(outputFile, "syntax error: call must be followed by an identifier");break;} //implemented | |
case 15: {fprintf(outputFile, "syntax error: call of a constant or variable is meaningless");break;} | |
case 16: {fprintf(outputFile, "syntax error: then expected");break;} //implemented | |
case 17: {fprintf(outputFile, "syntax error: semicolon or } expected");break;} //semicolon one implemented | |
case 18: {fprintf(outputFile, "syntax error: do expected");break;} //implemented | |
case 19: {fprintf(outputFile, "syntax error: incorrect symbol following statement");break;} | |
case 20: {fprintf(outputFile, "syntax error: relational operator expected");break;} //implemented | |
case 21: {fprintf(outputFile, "syntax error: expression must not contain a procedure identifier");break;} //implemented | |
case 22: {fprintf(outputFile, "syntax error: right parentheses missing.");break;} //implemented | |
case 23: {fprintf(outputFile, "syntax error: the preceding factor cannot begin with this symbol.");break;} //implemented | |
case 24: {fprintf(outputFile, "syntax error: an expression cannot begin with this symbol");break;} //implemented | |
case 25: {fprintf(outputFile, "syntax error: the number is too large.");break;} //implemented | |
case 27: {fprintf(outputFile,"syntax error: maximum identifier length exceeded.");break;} //implemented | |
case 28: {fprintf(outputFile,"syntax error: comment not terminated properly!");break;} //implemented | |
case 29: {fprintf(outputFile,"syntax error: '!' not followed by '='");break;} //implemented | |
case 30: {fprintf(outputFile,"syntax error: begin not followed by end");break;} //implemented | |
case 31: {fprintf(outputFile,"syntax error: if without then");break;} //implemented | |
case 32: {fprintf(outputFile,"syntax error: comma expected");break;} //implemented | |
case 33: {fprintf(outputFile,"syntax error: can not assign a value to a const");break;} //implemented | |
case 34: {fprintf(outputFile,"syntax error: multiple declarations of the same variable");break;} //implemented | |
} | |
// | |
exit(0); | |
} | |
/* | |
* this is the function we call that does the parser. | |
*/ | |
void parse() | |
{ | |
outputFile = fopen("parseroutput.txt","w"); | |
crntTokenListIndex = 0; | |
program(); | |
fprintf(outputFile,"No errors, program is syntactically correct.\n"); | |
fclose(outputFile); | |
} | |
void program() | |
{ | |
codeGen(JMP, 0, 0); //create a JMP variable, which we will later set to the begin statement of the main program. | |
gettok(); | |
block(); | |
if (!accept(periodsym)) | |
{ | |
error(9); | |
} | |
codeGen(OPR,0,0); //always add a RTN at the end | |
} | |
void block() | |
{ | |
if (accept(constsym)) //insert a const into the table. No code need be generated - we just replace the name of references | |
{ //with the value of the const, which we find from the symbol table. | |
do{ //const loop | |
gettok(); | |
crntSymbol.kind = 1; //since its a var, set its type to 2 | |
crntSymbol.level = 0; //constants have no level | |
crntSymbol.addr = 0; //constants have no addr | |
if (!accept(identsym)) | |
{ | |
error(4); | |
} | |
gettok(); | |
if (!accept(eqsym)) | |
{ | |
error(3); | |
} | |
gettok(); | |
if (!accept(numbersym)) | |
{ | |
error(2); | |
} | |
insertSymbol(); //insert constant into symbol table | |
gettok(); | |
}while (accept(commasym)); | |
if (accept(identsym)) //identifier without a comma. comma expected. | |
{ | |
error(32); | |
} | |
if (!accept(semicolonsym)) | |
{ | |
error(17); | |
} | |
gettok(); | |
} | |
if (accept(varsym)) //insert a var into the table | |
{ | |
do{ | |
gettok(); | |
crntSymbol.kind = 2; //since its a var, set its type to 2 | |
crntSymbol.level = crntLevel; //this doesnt need to be changed | |
crntSymbol.addr = tempMaddr + 3; //adds 3 to the address since the first 3 addresses are links | |
tempMaddr++; | |
if (!accept(identsym)) | |
{ | |
error(4); | |
} | |
insertSymbol(); //insert var into symbol table | |
gettok(); | |
}while(accept(commasym)); | |
if (accept(identsym)) //identifier without a comma. comma expected. | |
{ | |
error(32); | |
} | |
if (!accept(semicolonsym)) | |
{ | |
error(17); | |
} | |
gettok(); | |
} | |
while(accept(procsym)) //insert a procedure into the table | |
{ | |
crntLevel++; | |
crntSymbol.kind = 3; //since its a proc, set its type to 3 | |
crntSymbol.level = crntLevel; //this doesnt need to be changed | |
crntSymbol.addr = codeBaseSize; //we set the address to codebase size, as a kind of hack. this way we can look it up when we use "call" | |
tempMaddr = 0; //currently 0, we may need to change it? | |
gettok(); | |
if (!accept(identsym)) | |
{ | |
error(4); | |
} | |
insertSymbol(); //insert procedure into symbol table | |
gettok(); | |
if (!accept(semicolonsym)) | |
{ | |
error(6); | |
} | |
gettok(); | |
block(); | |
codeGen(OPR,0,0); //always add a RTN at the end | |
if (!accept(semicolonsym)) | |
{ | |
error(6); | |
} | |
gettok(); | |
crntLevel--; | |
} | |
//int tempAddr = codeBaseSize; | |
if (mainFlag == 0 && crntLevel == 0) | |
{ | |
changeMvalue(0,codeBaseSize); //at this point, we've declared everything, so the next line has to be the main line. | |
printf("%d",codeBaseSize); | |
mainFlag = 1; //we know the JMP will always be at 0 on the codebase. | |
} | |
codeGen(INC, 0, 3 + numVariables(crntLevel)); //create an increment variable | |
//changeMvalue(tempAddr,3 + numVariables(crntLevel)); //increment size | |
statement(); | |
} | |
void statement() | |
{ | |
if (accept(identsym)) //PARSER: some variable becomes an expression | |
{ | |
//CODEGEN: find the symbol of the current identifier. | |
//we pass in the crnt level and we try to get the variable on the current level, | |
//but if there isn't one there, we look at larger and larger scope | |
symbol s = findSymbol(tokenList[crntTokenListIndex - 1].name,crntLevel); | |
if (s.kind == 1) //assigning a value to a const! | |
{ | |
error(33); | |
} | |
gettok(); | |
if (!accept(becomessym)) | |
{ | |
error(1); | |
} | |
gettok(); | |
expression(); | |
codeGen(STO,crntLevel - s.level,s.addr); | |
//CODEGEN: store the result of the expression in the current token's address | |
//if the current token is say, a global variable (level 0), the L value of the | |
//generated code will be crnt level - 0. | |
//For example if crntlevel = 1, then the L value is 1 - 0, which is how many levels | |
//we must go down when storing. | |
} | |
else if (accept(callsym)) //call | |
{ | |
gettok(); //identifier | |
if (!accept(identsym)) | |
{ | |
error(14); | |
} | |
symbol s = findSymbol(tokenList[crntTokenListIndex - 1].name, crntLevel + 1); | |
//we find a symbol at the level we're at + 1, because we can call functions at | |
//higher levels. This will break down if we use multiple nested functions so we'll | |
//have to do a findprocedure function which finds the procedure from the symbol table. | |
if (s.kind == 1 || s.kind == 2) //we must only call procedures, these two types are var and const | |
{ | |
error(15); | |
} | |
codeGen(CAL, 0, s.addr); //call a certain function by jumping to it in the code | |
printf("%d",s.addr); | |
gettok(); | |
} | |
else if (accept(beginsym)) //begins a code block | |
{ | |
gettok(); | |
statement(); | |
while (accept(semicolonsym)) | |
{ //get statements | |
gettok(); | |
statement(); | |
} | |
if (!accept(endsym)) | |
{ | |
error(30); //begin not followed by end | |
} | |
gettok(); | |
} | |
else if (accept(ifsym)) //if statement | |
{ | |
gettok(); | |
condition(); | |
if (!accept(thensym)) | |
{ | |
error(16); | |
} | |
gettok(); | |
int tempAddress = codeBaseSize; | |
//CODEGEN: generate JPC code. | |
codeGen(JPC,0,0); | |
codeGen(INC,0,-1); //decrease the top of the stack. | |
statement(); | |
changeMvalue(tempAddress,codeBaseSize); //change the jumpconditional to whatever the current address is. | |
} | |
else if (accept(whilesym)) //while statement | |
{ | |
gettok(); | |
int conditionAddress = codeBaseSize; //get the address of the condition | |
condition(); | |
if (!accept(dosym)) | |
{ | |
error(18); | |
} | |
//CODEGEN: generate JPC code. | |
int tempAddress = codeBaseSize; //get the address of the conditional jump | |
codeGen(JPC,0,0); | |
codeGen(INC,0,-1); //decrease the top of the stack. | |
gettok(); | |
statement(); | |
codeGen(JMP,0,conditionAddress); //create a JMP to JMP back to the condition. | |
changeMvalue(tempAddress,codeBaseSize); //change the jumpconditional to whatever the current address is so we can check the condition again and loop | |
} | |
} | |
void condition() | |
{ | |
if (accept(oddsym)) | |
{ | |
gettok(); | |
expression(); | |
//CODEGEN: odd operation | |
codeGen(OPR,0,6); | |
} | |
else | |
{ | |
expression(); //this is the first operand | |
token_type tempToken = crntToken; //CODEGEN: save this so we can gen the code for it (this is the relation) | |
if (!accept(eqsym) && !accept(neqsym) && !accept(lessym) && !accept(leqsym) && !accept(gtrsym) && !accept(geqsym)) | |
{ | |
error(20); //not a relation | |
} | |
gettok(); | |
expression(); //second operand | |
switch (tempToken) //CODEGEN: generate code for conditional | |
{ | |
case (eqsym): | |
codeGen(OPR,0,8); | |
break; | |
case (neqsym): | |
codeGen(OPR,0,9); | |
break; | |
case (lessym): | |
codeGen(OPR,0,10); | |
break; | |
case (leqsym): | |
codeGen(OPR,0,11); | |
break; | |
case (gtrsym): | |
codeGen(OPR,0,12); | |
break; | |
case (geqsym): | |
codeGen(OPR,0,13); | |
break; | |
default: error(20); break; | |
} | |
} | |
} | |
void expression() | |
{ | |
if (accept(plussym) || accept(minussym)) | |
{ | |
gettok(); | |
if (accept(minussym)) | |
{ | |
codeGen(OPR,0,1); //CODEGEN: make the current top of the stack negative | |
} | |
} | |
if (accept(slashsym) || accept(multsym)) //expressions cannot begin with these | |
{ | |
error(24); | |
} | |
term(); | |
while (accept(plussym) || accept(minussym)) | |
{ | |
if (accept(plussym)) | |
{ | |
gettok(); | |
if (accept(procsym)) | |
{ | |
error(21); | |
} | |
term(); | |
codeGen(OPR,0,2); //CODEGEN: addition | |
} | |
else if (accept(minussym)) | |
{ | |
gettok(); | |
if (accept(procsym)) | |
{ | |
error(21); | |
} | |
term(); | |
codeGen(OPR,0,3); //CODEGEN: subtraction | |
} | |
} | |
} | |
void term() | |
{ | |
factor(); | |
while (accept(multsym) || accept(slashsym)) | |
{ | |
if (accept(multsym)) | |
{ | |
gettok(); | |
factor(); | |
codeGen(OPR,0,4); //CODEGEN: multiplication | |
} | |
else if (accept(slashsym)) | |
{ | |
gettok(); | |
factor(); | |
codeGen(OPR,0,5); //CODEGEN: division | |
} | |
} | |
} | |
//can be an identifier, a number, or an expression | |
void factor() | |
{ | |
if (accept(identsym)) | |
{ | |
char* tempIdentName = tokenList[crntTokenListIndex-1].name; | |
symbol s = findSymbol(tempIdentName,crntLevel); | |
if (s.kind == 1) //CODEGEN: constant | |
{ | |
codeGen(LIT,0,atoi(s.val)); | |
} | |
else if (s.kind == 2) //CODEGEN: var | |
{ | |
codeGen(LOD,crntLevel - s.level,s.addr); //CODEGEN: load the token given by the current identifier | |
} | |
gettok(); | |
} | |
else if(accept(numbersym)) | |
{ | |
codeGen(LIT,0,atoi(tokenList[crntTokenListIndex - 1].name)); //CODEGEN: load the number associated with this token as a literal | |
//we have to decrement the index because getTok already incremented it. | |
gettok(); | |
} | |
else if (accept(lparentsym)) //expression inside parentheses | |
{ | |
gettok(); | |
expression(); | |
if (!accept(rparentsym)) | |
{ | |
error(22); //no rparen error | |
} | |
gettok(); | |
} | |
else | |
{ | |
error(23); | |
} | |
} | |
/** | |
generates assembly code for a specified operation, M, and L value. Adds it to instruction list. | |
*/ | |
void codeGen(operation opr, int L, int M) | |
{ | |
instruction temp; | |
temp.OP = opr; | |
temp.L = L; | |
temp.M = M; | |
codeBase[codeBaseSize] = temp; //the code | |
codeBaseSize++; | |
} | |
/* | |
change M value of code at address "address" to "M" | |
*/ | |
void changeMvalue(int address, int M) | |
{ | |
codeBase[address].M = M; | |
} | |
void printCodeBase() | |
{ | |
outputFile = fopen("codegenoutput.txt","w"); | |
int i; | |
for (i = 0; i < codeBaseSize; i++) | |
{ | |
fprintf(outputFile,"%d %d %d\n",codeBase[i].OP,codeBase[i].L, codeBase[i].M); | |
printf("%d %d %d\n",codeBase[i].OP,codeBase[i].L, codeBase[i].M); | |
} | |
} | |
/* | |
P-Code translation machine | |
By: Stephen Bryant | |
Thursday, September 02, 2010. | |
*/ | |
void pmachine(void) | |
{ | |
//open files | |
inputFile = fopen("codegenoutput.txt","r"); | |
outputFile = fopen("pmachineoutput.txt","w"); | |
/*Init stack*/ | |
stack[1] = 0; | |
stack[2] = 0; | |
stack[3] = 0; | |
//temp struct | |
instruction temp; | |
codeBaseSize = 0; | |
//print header | |
fprintf(outputFile,"%s\t%s\t%s\t%s\n","Line","OP","L","M"); | |
while (fscanf(inputFile,"%d %d %d",((int)(codeBase[codeBaseSize].OP)),&(codeBase[codeBaseSize].L),&(codeBase[codeBaseSize].M)) != EOF) //read input.txt into codebase | |
{ | |
printCodeLine(codeBaseSize); //print current line being read | |
fprintf(outputFile,"\n"); | |
codeBaseSize++; | |
} | |
//header for execution | |
fprintf(outputFile,"\n\n\n"); | |
fprintf(outputFile,"\t\t\t\t\tPC\tBP\tSP\tStack\n"); | |
fprintf(outputFile,"Initial Values\t\t\t\t%d\t%d\t%d\t",PC,BP,SP); | |
printStack(BP,SP); | |
fprintf(outputFile,"\n"); | |
//loop fetch/execute | |
int flag = 0; | |
do | |
{ | |
printCodeLine(PC); | |
fetch(); | |
//we're done executing after this loop | |
if (IR->OP == 2 && IR->M == 0 && (BP == 1)) | |
{ | |
flag = 1; | |
} | |
execute(); | |
fprintf(outputFile,"\t%d\t%d\t%d\t",PC,BP,SP); | |
printStack(BP,SP); | |
fprintf(outputFile,"\n"); | |
} while(flag == 0); //if this were true, it would mean the program is returning and there is nothing left on the stack | |
fclose(inputFile); | |
fclose(outputFile); | |
} | |
void printCodeLine(int i) | |
{ | |
fprintf(outputFile,"%d\t", i); //print line number | |
switch (codeBase[i].OP) | |
{ | |
case 1: //LIT: push literal "M" to top of the stack | |
fprintf(outputFile,"lit\t"); | |
break; | |
case 2: //OPR: perform operation | |
fprintf(outputFile,"opr\t"); | |
break; | |
case 3: //LOD: load item at offset M, L levels down to the top of the stack. | |
fprintf(outputFile,"lod\t"); | |
break; | |
case 4: //STO: store item at offset M, L levels down from the top of the stack. | |
fprintf(outputFile,"sto\t"); | |
break; | |
case 5: //CAL: create a new frame. | |
fprintf(outputFile,"cal\t"); | |
break; | |
case 6: //INC: allocate M locals (first 3 are SL, DL, RA) | |
fprintf(outputFile,"inc\t"); | |
break; | |
case 7: //JMP: jump to line M of the code base | |
fprintf(outputFile,"jmp\t"); | |
break; | |
case 8: //JPC: jump to line M of the code base if the item on top of the stack is 0 | |
fprintf(outputFile,"jpc\t"); | |
break; | |
case 9: //SOI: print the top of the stack | |
fprintf(outputFile,"soi\t"); | |
break; | |
case 10: //SIO: store input to the stack at point SP. | |
fprintf(outputFile,"sio\t"); | |
break; | |
default: | |
break; | |
} | |
fprintf(outputFile,"%d\t%d\t",codeBase[i].L,codeBase[i].M); | |
} | |
//print entire populated stack. | |
void printStack(int tempBP, int tempSP) | |
{ | |
int i; | |
if (tempBP == 0) | |
{ | |
return; | |
} | |
else | |
{ | |
printStack(stack[tempBP + 1],tempBP-1); | |
if (tempBP != 1) | |
{ | |
fprintf(outputFile,"| "); | |
} | |
if (tempBP == tempSP + 1) //this would be if someone used "CAL" recently, the stack pointer would not have incremented and the new AR would need to show up. | |
{ | |
for (i = tempBP; i < tempBP + 3; i++) | |
{ | |
fprintf(outputFile,"%d ",stack[i]); | |
} | |
} | |
else //this is a normal AR without a recent "CAL" | |
{ | |
for (i = tempBP; i <= tempSP; i++) | |
{ | |
fprintf(outputFile,"%d ",stack[i]); | |
} | |
} | |
} | |
} | |
//fetch cycle, retrieves code from codebase to IR | |
void fetch() | |
{ | |
IR = &codeBase[PC]; | |
PC++; | |
} | |
//execute cycle, executes code in IR | |
void execute() | |
{ | |
int op = IR->OP; | |
switch (op) | |
{ | |
case 1: //LIT: push literal "M" to top of the stack | |
SP++; | |
stack[SP] = IR->M; | |
break; | |
case 2: //OPR: perform operation | |
switch(IR->M) | |
{ | |
case 0: //RTN: return operation | |
SP = BP - 1; //top of the stack is the base pointer of the current frame - 1 (since the frame is deleted) | |
PC = stack[SP + 3]; //new program counter is the return address from the frame | |
BP = stack[SP + 2]; //new base pointer is the dynamic link from the current frame | |
break; | |
case 1: //NEG: negative of the top of the stack | |
stack[SP] = - stack[SP]; | |
break; | |
case 2: //ADD: add top two fields, store result on the top of the stack | |
SP--; | |
stack[SP] = stack[SP] + stack[SP+1]; | |
break; | |
case 3: //SUB: subtract top field from the second field, store result on top of the stack | |
SP--; | |
stack[SP] = stack[SP] - stack[SP+1]; | |
break; | |
case 4: //MUL: multiply top two items, store result on the top of the stack | |
SP--; | |
stack[SP] = stack[SP] * stack[SP+1]; | |
break; | |
case 5: //DIV: divide second field by top field, store result | |
SP--; | |
stack[SP] = stack[SP] / stack[SP+1]; | |
break; | |
case 6: //ODD: returns true if the top field is odd | |
stack[SP] = stack[SP] % 2; | |
break; | |
case 7: //MOD: mod the field one level down by the field on top of the stack, store on top of the stack. | |
SP--; | |
stack[SP] = stack[SP] % stack[SP+1]; | |
break; | |
case 8: //EQL: whether the top two fields are equal, adds a boolean value to the top of the stack. | |
SP--; | |
stack[SP] = (stack[SP] == stack[SP+1]); | |
break; | |
case 9: //NEQ: when the top two fields are not equal, adds a boolean value true to the top of the stack, or false. | |
SP--; | |
stack[SP] = !(stack[SP] == stack[SP+1]); | |
case 10: //LSS: adds to the top of the stack true if the first argument is less than the second argument. | |
SP--; | |
stack[SP] = stack[SP] < stack[SP+1]; | |
break; | |
case 11: //LEQ: adds to the top of the stack true if the first argument is less than or equal to the second argument. | |
SP--; | |
stack[SP] = stack[SP] <= stack[SP+1]; | |
break; | |
case 12: //GTR: adds to the top of the stack true if the first argument is greater than the second argument. | |
SP--; | |
stack[SP] = stack[SP] > stack[SP+1]; | |
break; | |
case 13: //GEQ: adds to the top of the stack "true" if the first argument is greater than or equal to the second argument. | |
SP--; | |
stack[SP] = stack[SP] >= stack[SP+1]; | |
break; | |
default: | |
exit(0); //error | |
break; | |
} | |
break; | |
case 3: //LOD: load item at offset M, L levels down to the top of the stack. | |
SP++; | |
stack[SP] = stack[base(IR->L,BP) + IR->M]; | |
break; | |
case 4: //STO: store item at offset M, L levels down from the top of the stack. | |
stack[base(IR->L,BP) + IR->M] = stack[SP]; | |
SP--; | |
break; | |
case 5: //CAL: create a new frame. | |
stack[SP + 1] = base(IR->L,BP); //static link | |
stack[SP + 2] = BP; //dynamic link: points to previous stack frame | |
stack[SP + 3] = PC; //Address in code of next code segment | |
BP = SP + 1; //base pointer to the bottom of the new frame | |
PC = IR->M; | |
break; | |
case 6: //INC: allocate M locals (first 3 are SL, DL, RA) | |
SP += IR->M; | |
break; | |
case 7: //JMP: jump to line M of the code base | |
PC = IR->M; | |
break; | |
case 8: //JPC: jump to line M of the code base if the item on top of the stack is 0 | |
if (stack[SP] == 0) | |
{ | |
PC = IR->M; | |
SP--; | |
} | |
break; | |
case 9: //SOI: print the top of the stack | |
fprintf(outputFile,"\nTop Of the stack:%d",stack[SP]); | |
SP--; | |
break; | |
case 10: //SIO: store input to the stack at point SP. | |
fscanf(inputFile,"%d ",&stack[SP]); | |
SP++; | |
break; | |
default: | |
break; | |
} | |
} | |
/**********************************************/ | |
/* Find base L levels down */ | |
/* */ | |
/**********************************************/ | |
int base(int l,int base) // l stand for L in the instruction format | |
{ | |
int b1; //find base L levels down | |
b1 = base; | |
while (l > 0) | |
{ | |
b1 = stack[b1]; //base pointer equals the current base pointer (which points down to the previous frame's base) | |
l--; | |
} | |
return b1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment