rondorkerin · July 18, 2014 17:31
diff --git a/gistfile1.c b/gistfile1.c
 /*
 PL0 Compiler
 By:	Stephen Bryant
 
 Last update:
 October 24, 2010
 */ 

 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

 #define MAX_IDENTIFIER_LENGTH 11
 #define MAX_NUMBER_LENGTH 5
 #define NUM_RESERVED_WORDS 15
 #define NUM_SPECIAL_SYMBOLS 14
 #define MAX_TABLE_SIZE 256
 #define MAX_LEXEME_SIZE 1000
 #define MAX_STACK_HEIGHT 2000
 #define MAX_LEXI_LEVELS 3
 #define NUMBER_TOKEN_TYPES 36
 #define MAX_CODE_LENGTH 500

 typedef enum { //p-machine operation enumerations
 	LIT = 1, OPR = 2, LOD = 3, STO = 4, CAL = 5, INC = 6, 
 	JMP = 7, JPC = 8, WRT = 9, SIO = 10} operation;

 typedef enum { //tokens used in scanner
 	nul = 1,nulsym, identsym, numbersym, plussym, minussym,
 	multsym, slashsym, oddsym, eqsym, neqsym, lessym, leqsym,
 	gtrsym, geqsym, lparentsym, rparentsym, commasym, semicolonsym,
 	periodsym, becomessym, beginsym, endsym, ifsym, thensym,
 	whilesym, dosym, callsym, constsym, varsym, procsym, outsym,
 	insym , elsesym, colonsym, exclamsym } token_type;

 //maps token names
 char* token_names[NUMBER_TOKEN_TYPES] = {"","nulsym", "identsym", "numbersym", "plussym", "minussym", //1-6
 	"multsym", "slashsym", "oddsym", "eqsym", "neqsym", "lessym", "leqsym", //7-13
 	"gtrsym", "geqsym", "lparentsym", "rparentsym", "commasym", "semicolonsym", //14-19
 	"periodsym", "becomessym", "beginsym", "endsym", "ifsym", "thensym", //20-25
 	"whilesym", "dosym", "callsym", "constsym", "varsym", "procsym", "outsym", //26-32
 	"insym" , "elsesym", "colonsym", "exclamsym"}; //33-36

 typedef struct //used in the scanner
 {
 	token_type id; //id of token
 	char name[MAX_IDENTIFIER_LENGTH]; //name of token
 	int index; //index in token array
 } token;


 typedef struct //used in the p-machine 
 {
 	operation OP; 
 	int L;
 	int M;
 	
 } instruction;

 typedef struct //used in the parser
 {
 	int kind; //const = 1, var = 2, proc = 3
 	char name[MAX_IDENTIFIER_LENGTH];
 	char val[MAX_NUMBER_LENGTH]; //number (ascii value)
 	int level; //L level
 	int addr; //M addr
 } symbol;


 /*
 Scanner stuff
 */
 //reserved word names
 char* reservedWords[] = {"null","call","begin","end","if","then","else","while","do","in","out", "odd","const","procedure", "var"};
 //internal representation of reserved words
 token_type wsym[] = {nulsym,callsym, beginsym, endsym, ifsym, thensym, elsesym, whilesym, dosym, insym, outsym, oddsym, constsym,procsym,varsym};

 //special symbol names
 char specialSymbols[] = {'+','-','*','/','(',')','=',',','.','<','>',';',':','!'};
 //internal representation of special symbols
 token_type ssym[] = {plussym,minussym,multsym,slashsym,lparentsym,rparentsym,eqsym,commasym,periodsym,lessym,gtrsym,semicolonsym,colonsym,exclamsym};

 token tokenList[MAX_TABLE_SIZE];
 int crntTokenListIndex = 0; //current index in token list
 int tokenListSize = 0;

 /*
 Parser Stuff
 */

 symbol symbol_table[MAX_TABLE_SIZE];
 symbol crntSymbol; //current symbol.
 token_type crntToken; //current token
 int crntLevel = 0; //current L level
 int symbolTableSize = 0; 
 int tempMaddr = 0; //temp M used for calculating M addressess

 /*
 Codegen stuff
 */
 int tempLvalue;
 int mainLineNumber = 0; //the line number of our main method
 int mainFlag = 0; //basically, when i set this to 1, it means we've already changed the main function's address and we don't need to do it anymore.

 /*
 P-machine stuff
 */
 int stack[MAX_STACK_HEIGHT];
 int SP = 0, BP = 1, PC = 0; //stack pointer, base pointer, program counter
 instruction* IR; //instruction register
 instruction codeBase[MAX_CODE_LENGTH]; //the code
 int codeBaseSize; //current size of the code base

 FILE* inputFile;
 FILE* outputFile;


 /*
 basic function declarations
 */
 void scan();
 void parse();
 token_type isReservedWord(char* tempIdentifier);
 token_type isSpecialSymbol(char temp);
 symbol findSymbol(char* name, int level);
 void printSymbolTable();
 void insertSymbol();
 void addConstantNumber(int temp);
 void gettok();
 int accept(token_type s);
 void error(int num);
 void program();
 void block();
 void statement();
 void condition();
 void expression();
 void term();
 void factor();
 void codeGen(operation opr, int M, int L);
 void changeMvalue(int address, int M);
 void printCodeBase();
 void printCodeLine(int i);
 void printTopOfStack();
 void printStack(int tempBP,int tempSP);
 void inputToStack();
 void fetch();
 void execute();
 void pmachine();


 /*
 Program driving function
 */
 int main()
 {
 	int i;
 	
 	printf("scanning:\n");
 	
 		
 	scan();
 	for (i = 0; i < tokenListSize; i++)
 	{
 		token x = tokenList[i];
 		printf("%d\n", x.id);
 	}

 	printf("parsing:\n");
 	//	
 	parse();

 	printf("symbol table:\n");   
 	//	
 	for (i = 0; i < symbolTableSize; i++)
 	{
 		printf("name %s kind %d level %d addr %d val %s\n",symbol_table[i].name,symbol_table[i].kind,symbol_table[i].level,symbol_table[i].addr,symbol_table[i].val);
 	}
 	printf("printing code base:\n");
 	//	
 	printCodeBase();
 	printf("Executing code in p machine!\n");
 	//	
 	pmachine();
 	printf("success!\n");
 	//
 }

 /*
 Populates tokenList with tokens from the program.
 */ 
 void scan()
 {
 	inputFile = fopen("programinput.txt","r");
 	outputFile = fopen("lexoutput.txt","w");
 	char temp;
 	int i; //counter to make sure identifiers and numbers are correct length
 	while ((temp = fgetc(inputFile)) != EOF) //Token reading loop
 	{
 		token t;
 		token_type type;
 		i = 0;
 		if (isalpha(temp))
 		{	
 			char tempIdentifier[MAX_IDENTIFIER_LENGTH];
 			do //read characters into identifier
 			{
 				tempIdentifier[i] = temp;
 				i++;
 				temp = fgetc(inputFile);
 			} while (isalnum(temp) && temp != EOF);
 			if (i > MAX_IDENTIFIER_LENGTH) //if the identifier becomes too long
 			{
                outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner                  
 				error(27); 
 			}
 			else //we've reached something that isn't alphanumeric
 			{
 				ungetc(temp,inputFile);
 				tempIdentifier[i] = 0; //add null
 				
 				type = isReservedWord(tempIdentifier);
 				
 				if (strcmp(tempIdentifier,"null") == 0)
 				{
 					t.id = nulsym;
 				}
 				else if (type == nulsym) //it's an identifier
 				{
 					t.id = identsym; 
 					strcpy(t.name,tempIdentifier);
 				}
 				else
 				{
 					t.id = type; //it's a reserved word
 					strcpy(t.name,tempIdentifier);
 				}
 			}
 		}
 		else if (isdigit(temp))
 		{
 			char value[MAX_NUMBER_LENGTH+1]; //temporary identifier
 			do
 			{
 				value[i] = temp;
 				i++;
 				temp = fgetc(inputFile);
 				
 			}while (isdigit(temp) && temp != EOF);
 			if (i > MAX_NUMBER_LENGTH)
 			{
                outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner
 				error(25);                                  //so i made this ad-hoc fix.
 			}
 			else
 			{
 				ungetc(temp,inputFile);
 				value[i] = 0; //add null char
 				t.id = numbersym;
 				strcpy(t.name,value);
 			}
 			
 		}
 		else if ( (type = isSpecialSymbol(temp)) != nulsym) //assign type to the token_type of temp, make sure it's a token
 		{
 			
 			switch (type) //int ssym[] = {plussym,minussym,multsym,slashsym,lparentsym,rparentsym,identsym,commasym,periodsym,lessym,gtrsym,semicolonsym,colonsym};
 			{
 				case (slashsym):
 				{
 					temp = fgetc(inputFile);
 					if (isSpecialSymbol(temp) == multsym) //This is a comment
 					{
 						do
 						{
 							temp = fgetc(inputFile);
 						}while(isSpecialSymbol(temp) != multsym);
 						temp = fgetc(inputFile);
 						t.id = nulsym; //we disregard this token because it's a comment
 						if (isSpecialSymbol(temp) != slashsym)
 						{
                            outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner
 							error(28);
 						}
 					}
 					else //this is a slash symbol
 					{
 						strcpy(t.name,"/");
 						t.id = slashsym;
 						ungetc(temp,inputFile);
 					}
 					break;
 					
 				}
 				case (colonsym):
 				{
 					temp = fgetc(inputFile);
 					if (isSpecialSymbol(temp) == eqsym) //if it is a becoming token.
 					{
 						t.id = becomessym;
 						strcpy(t.name,":=");
 					}
 					else
 					{
 						ungetc(temp,inputFile);
 						error(1);
 					}
 					break;
 				}
 				case (lessym):
 				{
 					temp = fgetc(inputFile);
 					if (isSpecialSymbol(temp) == eqsym) // <= token
 					{
 						t.id = leqsym;
 						strcpy(t.name,"<=");
 					}
 					else
 					{
 						t.id = lessym;
 						strcpy(t.name,"<");
 						ungetc(temp,inputFile);
 					}
 					break;					
 				}
 				case (gtrsym):
 				{
 					temp = fgetc(inputFile);
 					if (isSpecialSymbol(temp) == eqsym) //>= token
 					{
 						t.id = geqsym;
 						strcpy(t.name,">=");
 						
 					}
 					else // > token
 					{
 						t.id = gtrsym;
 						strcpy(t.name,">");
 						ungetc(temp,inputFile);
 					}
 					break;
 				}
 				case (exclamsym):
 				{
 					temp = fgetc(inputFile);
 					if (isSpecialSymbol(temp) == eqsym) //!= token
 					{
 						
 						t.id = neqsym;
 						strcpy(t.name,"!=");
 					}
 					else // ! token
 					{
                        outputFile = fopen("parseroutput.txt","w"); //this is considered a "parser" error even though we've been putting it in scanner
                        error(29);
 						ungetc(temp,inputFile);
 					}
 					break;				
 				}
 				default:
 				{
 					int j;
 					t.id = type;
 					for (j = 0; j < NUM_SPECIAL_SYMBOLS; j++)
 					{
 						if (ssym[j] == type)
 						{
 							t.name[0] = specialSymbols[j]; //set the name equal to the
 							t.name[1] = 0; //nulsym
 						}
 					}
 				}
 			}
 		}
 		else if (temp == 32 || temp == 10 || temp == 13 || temp == 9) //it is a whitespace, ignore it.
 		{
 			t.id = nulsym; //whitespace
 		}
 		else if (temp != 32 && temp != 10 && temp != 13 && temp != 9) //if it's not a white space character
 		{
 			fprintf(outputFile,"invalid symbol.[%d]",temp);
 		}
 		if (t.id != nulsym) //if id is nulsym, it's a comment and therefore should not be added to a token list. Otherwise, add it to the list.
 		{
 			tokenList[crntTokenListIndex] = t;
 			t.index = crntTokenListIndex; 
 			crntTokenListIndex++;
 		}
 	}
 	
 	tokenListSize = crntTokenListIndex;
 	
 	fclose(inputFile);
 	fclose(outputFile);
 	
 	return;
 }
 /*
 *  Scanner helper function.
 *	returns nulsym if the identifier is not found, the identifier otherwise
 **/
 token_type isReservedWord(char* tempIdentifier)
 {
 	int i;
 	for (i = 0; i < NUM_RESERVED_WORDS; i++) //check if its a reserved word
 	{
 		if (strcmp(tempIdentifier,reservedWords[i]) == 0)
 		{
 			return wsym[i]; //return token_type
 		}
 	}
 	return nulsym;
 }

 /*
 *  Scanner helper function
 *	returns nulsym if the character is not a special symbol, returns the token_type if it is a special symbol	
 **/
 token_type isSpecialSymbol(char temp)
 {
 	int i;
 	for (i = 0; i < NUM_SPECIAL_SYMBOLS; i++)
 	{
 		if (temp == specialSymbols[i])
 		{
 			return ssym[i];
 		}
 	}
 	return nulsym;
 }

 /*
 Recursive Descent Parser functions
 */

 /*
 Parser helper function.
 prints entire symbol table
 */
 void printSymbolTable()
 {
 	int i;
 	for (i = 0; i < symbolTableSize; i++)
 	{
 		symbol s = symbol_table[i];
 		if (s.kind == 1)
 		{
 			printf("constant %s, value %s\n", s.name, s.val);
 		}
 		else if (s.kind == 2)
 		{
 			printf("variable %s, L level %d, M addr %d\n", s.name, s.level, s.addr);
 		}
 		else if (s.kind == 3)
 		{
 			printf("procedure %s, L level %d, M addr %d\n", s.name, s.level, s.addr);
 		}		
 	}
 }

 /*
 * parser helper function.
 * inserts crnt symbol into the table. Updates table if the symbol is already there.
 */
 void insertSymbol()
 {
 	int i;
 	for (i = 0; i < symbolTableSize; i++)
 	{
 		if (strcmp(symbol_table[i].name,crntSymbol.name) == 0 && symbol_table[i].level == crntSymbol.level) //if they have the same name AND level, error
 		{
 			error(34);
 		}
 	}
 	
 	if (symbolTableSize < MAX_TABLE_SIZE) //the non-duplicate case.
 	{
 		symbol_table[symbolTableSize] = crntSymbol;
 		symbolTableSize++;
 		return;
 	}
 	else
 	{
 		fprintf(outputFile,"Error: max symbolTableSize exceeded.");
 		return;
 	}
 }

 /*
 *	returns the symbol from the symbol table with the specified name and level.
 *      if there is no symbol of that name at that level, we keep searching deeper until we find one.
 */
 symbol findSymbol(char* name, int level)
 {
 	int i, lvl;
 	for (lvl = level; lvl >= 0; lvl--)
 	{
 		for (i = 0; i < symbolTableSize; i++)
 		{
 			if (strcmp(symbol_table[i].name,name) == 0 && symbol_table[i].level == lvl) 
 			{
 				return symbol_table[i];
 			}
 		}
 	}
 }

 /*
 *   counts the variables on the current level in the symbol table
 */
 int numVariables(int crntLevel)
 {
    int i, counter = 0;
    for (i = 0; i < symbolTableSize; i++)
    {
        if (symbol_table[i].level == crntLevel && symbol_table[i].kind == 2)
        {
 			counter++;
        }
    }
    return counter;
 }

 /*
 *   Parser helper function
 *	gets the next token from the input and sets it to the "crntToken" variable.
 *   Also gets name if its an identifier and gets the value if its a number and sets it to the current symbol.
 */
 void gettok()
 {	
 	if (crntTokenListIndex < tokenListSize) //if we haven't gone through all of the tokens
 	{
 		crntToken = (token_type)tokenList[crntTokenListIndex].id;
 		
 		if (crntToken == identsym) //this is an identifier, so we need to get the name
 		{	
 			strcpy(crntSymbol.name,tokenList[crntTokenListIndex].name);
 			
 		}
 		if (crntToken == numbersym) //Since this is a number token, we're going to update the current symbol with it
 		{
 			strcpy(crntSymbol.val,tokenList[crntTokenListIndex].name);	
 		}
 		crntTokenListIndex++;
 	}
 }

 /*
 *   parser helper function
 *   returns 1 if the current symbol equals the symbol we're looking or
 */
 int accept(token_type s) {
    if (crntToken == s) {
        return 1;
    }
    return 0;
 }

 /*
 *
 *	error: pass in the error number and it prints out the error and quits parsing
 */
 void error(int num)
 {
    printf("parsing failed! error number %d. See parseroutput.txt for details.\n",num);

 	switch (num)
 	{
 		case 1: {fprintf(outputFile, "syntax error: Use = instead of :=");break;} //implemented
 		case 2: {fprintf(outputFile, "syntax error: = must be followed by a number");break;} //implemented
 		case 3: {fprintf(outputFile, "syntax error: Identifier must be followed by =");break;} //implemented
 		case 4: {fprintf(outputFile, "syntax error: const, var, procedure must be followed by identifier");break;} //implemented
 		case 5: {fprintf(outputFile, "syntax error: semicolon or comma missing");break;} //implemented
 		case 6: {fprintf(outputFile, "syntax error: incorrect symbol after procedure declaration");break;} //KIND OF implemented? 
 		case 7: {fprintf(outputFile, "syntax error: statement expected.");break;}
 		case 8: {fprintf(outputFile, "syntax error: incorrect symbol after statement part in block.");break;}
 		case 9: {fprintf(outputFile, "syntax error: period expected");break;} //IMPLEMENTED
 		case 10: {fprintf(outputFile, "syntax error: semicolon between statements missing");break;}
 		case 11: {fprintf(outputFile, "syntax error: undeclared identifier");break;}
 		case 12: {fprintf(outputFile, "syntax error: assignment to constant or procedure not allowed");break;}
 		case 13: {fprintf(outputFile, "syntax error: assignment operator expected");break;}		
 		case 14: {fprintf(outputFile, "syntax error: call must be followed by an identifier");break;} //implemented
 		case 15: {fprintf(outputFile, "syntax error: call of a constant or variable is meaningless");break;}
 		case 16: {fprintf(outputFile, "syntax error: then expected");break;} //implemented
 		case 17: {fprintf(outputFile, "syntax error: semicolon or } expected");break;} //semicolon one implemented
 		case 18: {fprintf(outputFile, "syntax error: do expected");break;} //implemented
 		case 19: {fprintf(outputFile, "syntax error: incorrect symbol following statement");break;}
 		case 20: {fprintf(outputFile, "syntax error: relational operator expected");break;} //implemented
 		case 21: {fprintf(outputFile, "syntax error: expression must not contain a procedure identifier");break;}	//implemented
 		case 22: {fprintf(outputFile, "syntax error: right parentheses missing.");break;} //implemented
 		case 23: {fprintf(outputFile, "syntax error: the preceding factor cannot begin with this symbol.");break;} //implemented
 		case 24: {fprintf(outputFile, "syntax error: an expression cannot begin with this symbol");break;}  //implemented
 		case 25: {fprintf(outputFile, "syntax error: the number is too large.");break;} //implemented
 		case 27: {fprintf(outputFile,"syntax error: maximum identifier length exceeded.");break;} //implemented
        case 28: {fprintf(outputFile,"syntax error: comment not terminated properly!");break;} //implemented
        case 29: {fprintf(outputFile,"syntax error: '!' not followed by '='");break;} //implemented
        case 30: {fprintf(outputFile,"syntax error: begin not followed by end");break;} //implemented
        case 31: {fprintf(outputFile,"syntax error: if without then");break;} //implemented
        case 32: {fprintf(outputFile,"syntax error: comma expected");break;} //implemented        
        case 33: {fprintf(outputFile,"syntax error: can not assign a value to a const");break;} //implemented         
        case 34: {fprintf(outputFile,"syntax error: multiple declarations of the same variable");break;} //implemented                 
    }
    //
 	exit(0);
 }


 /*
 * this is the function we call that does the parser.
 */
 void parse()
 {
 	outputFile = fopen("parseroutput.txt","w");
 	crntTokenListIndex = 0;
 	program();
 	fprintf(outputFile,"No errors, program is syntactically correct.\n");
 	fclose(outputFile);
 }

 void program()
 {
   	codeGen(JMP, 0, 0); //create a JMP variable, which we will later set to the begin statement of the main program.
 	gettok();
 	block();
 	if (!accept(periodsym))
 	{       
 		error(9);
 	}
 	codeGen(OPR,0,0); //always add a RTN at the end
 }

 void block()
 {
 	if (accept(constsym)) //insert a const into the table. No code need be generated - we just replace the name of references
 	{                     //with the value of the const, which we find from the symbol table.
 		do{ //const loop
 			gettok();
 			crntSymbol.kind = 1; //since its a var, set its type to 2
 			crntSymbol.level = 0; //constants have no level
 			crntSymbol.addr = 0; //constants have no addr
 			
 			if (!accept(identsym))
 			{
 				error(4);
 			}
 			gettok();
 			if (!accept(eqsym))
 			{
                error(3);
            }
 			gettok();
 			if (!accept(numbersym))
 			{
                error(2);
            }
 			insertSymbol(); //insert constant into symbol table
 			gettok();
 		}while (accept(commasym));
 		if (accept(identsym)) //identifier without a comma. comma expected.
 		{
              error(32); 
        }
 		if (!accept(semicolonsym))
 		{
 			error(17);
 		}
 		gettok();
 	}
 	if (accept(varsym)) //insert a var into the table
 	{
 		do{
 			gettok();
 			crntSymbol.kind = 2; //since its a var, set its type to 2
 			crntSymbol.level = crntLevel; //this doesnt need to be changed
 			crntSymbol.addr = tempMaddr + 3; //adds 3 to the address since the first 3 addresses are links
 			tempMaddr++;
 			
 			if (!accept(identsym))
 			{
 				error(4);
 			}
 			insertSymbol(); //insert var into symbol table
 			gettok();
 		}while(accept(commasym));
 		if (accept(identsym)) //identifier without a comma. comma expected.
 		{
              error(32); 
        }		
 		if (!accept(semicolonsym))
 		{
 			error(17);
 		}
 		gettok();
 	}	
 	while(accept(procsym)) //insert a procedure into the table
 	{
 		crntLevel++;
 		crntSymbol.kind = 3; //since its a proc, set its type to 3
 		crntSymbol.level = crntLevel; //this doesnt need to be changed
 		crntSymbol.addr = codeBaseSize; //we set the address to codebase size, as a kind of hack. this way we can look it up when we use "call"
        	tempMaddr = 0; //currently 0, we may need to change it?
 		

 		gettok();

 		if (!accept(identsym))
 		{
 			error(4);
 		}
 		insertSymbol(); //insert procedure into symbol table
 		gettok();
 		if (!accept(semicolonsym))
 		{
 			error(6);
 		}
 		gettok();
 		block();
 		codeGen(OPR,0,0); //always add a RTN at the end
 		if (!accept(semicolonsym))
 		{
 			error(6);
 		}
 		gettok();
 		crntLevel--;
 	}	
        //int tempAddr = codeBaseSize;
 	if (mainFlag == 0 && crntLevel == 0)
 	{
 	       changeMvalue(0,codeBaseSize); //at this point, we've declared everything, so the next line has 			to be the main line.
 	       printf("%d",codeBaseSize);
 	       
 	       mainFlag = 1;                     //we know the JMP will always be at 0 on the codebase.
 	}	
   	codeGen(INC, 0, 3 + numVariables(crntLevel)); //create an increment variable
  	//changeMvalue(tempAddr,3 + numVariables(crntLevel));	//increment size	    
 	statement();
 }

 void statement()
 {
 	if (accept(identsym)) //PARSER: some variable becomes an expression
 	{
 		//CODEGEN: find the symbol of the current identifier.
 		//we pass in the crnt level and we try to get the variable on the current level,
 		//but if there isn't one there, we look at larger and larger scope
 		symbol s = findSymbol(tokenList[crntTokenListIndex - 1].name,crntLevel);
 		if (s.kind == 1) //assigning a value to a const!
 		{
 		   error(33);           
 		}
 			gettok();
 		if (!accept(becomessym))
 		{
 	    		error(1);
 		}
 		gettok();
 		expression();
 		codeGen(STO,crntLevel - s.level,s.addr); 
 		//CODEGEN: store the result of the expression in the current token's address
 		//if the current token is say, a global variable (level 0), the L value of the 
 		//generated code will be crnt level - 0. 
 		//For example if crntlevel = 1, then the L value is 1 - 0, which is how many levels
 		//we must go down when storing.		
 	}
 	else if (accept(callsym)) //call 
 	{
 		gettok(); //identifier 
 		if (!accept(identsym))
 		{
 			error(14);
 		}
 		symbol s = findSymbol(tokenList[crntTokenListIndex - 1].name, crntLevel + 1);
 		//we find a symbol at the level we're at + 1, because we can call functions at
 		//higher levels. This will break down if we use multiple nested functions so we'll
 		//have to do a findprocedure function which finds the procedure from the symbol table.
 		if (s.kind == 1 || s.kind == 2) //we must only call procedures, these two types are var and const
 		{
 			error(15);
 		}
        
    		codeGen(CAL, 0, s.addr); //call a certain function by jumping to it in the code
    		printf("%d",s.addr);
    	
 		gettok();   
 	}
 	else if (accept(beginsym)) //begins a code block
 	{
 	    	gettok();
 		statement();  
 		while (accept(semicolonsym))
 		{ //get statements
 			gettok();
 			statement();
 		}
 		if (!accept(endsym))
 		{
 		    error(30); //begin not followed by end           
 		}		
 		gettok();
 	}
 	else if (accept(ifsym)) //if statement
 	{
 		gettok();
 		condition();
 		if (!accept(thensym))
 		{
 			error(16);
 		}
 		gettok();
 		int tempAddress = codeBaseSize;
        //CODEGEN: generate JPC code.
        codeGen(JPC,0,0); 
        codeGen(INC,0,-1); //decrease the top of the stack.        
 	statement();
        changeMvalue(tempAddress,codeBaseSize); //change the jumpconditional to whatever the current address is.
 	}
 	else if (accept(whilesym)) //while statement
 	{
 		gettok();
 		int conditionAddress = codeBaseSize; //get the address of the condition
 		condition();
 		if (!accept(dosym))
 		{
 			error(18);
 		}
 		//CODEGEN: generate JPC code.
 		int tempAddress = codeBaseSize; //get the address of the conditional jump
 		codeGen(JPC,0,0); 
 		codeGen(INC,0,-1); //decrease the top of the stack.
 		gettok();
 		statement();
 		codeGen(JMP,0,conditionAddress); //create a JMP to JMP back to the condition.
        	changeMvalue(tempAddress,codeBaseSize); //change the jumpconditional to whatever the current address is so we can check the condition again and loop
 		
 	}
 }
 void condition()
 {
 	if (accept(oddsym))
 	{
 		gettok();
 		expression();
 		//CODEGEN: odd operation
 		codeGen(OPR,0,6);
 	}
 	else
 	{
 		expression(); //this is the first operand
 		token_type tempToken = crntToken; //CODEGEN: save this so we can gen the code for it (this is the relation)
 		if (!accept(eqsym) && !accept(neqsym) && !accept(lessym) && !accept(leqsym) && !accept(gtrsym) && !accept(geqsym))
 		{
 			error(20); //not a relation            
 		}
 		gettok();
 		expression(); //second operand

 		switch (tempToken) //CODEGEN: generate code for conditional
 		{
 			case (eqsym):
 				codeGen(OPR,0,8);
 				break;
 			case (neqsym):
 				codeGen(OPR,0,9);
 				break;
 			case (lessym):
 				codeGen(OPR,0,10);
 				break;
 			case (leqsym):
 				codeGen(OPR,0,11);
 				break;				
 			case (gtrsym):
 				codeGen(OPR,0,12);
 				break;				
 			case (geqsym):
 				codeGen(OPR,0,13);
 				break;				
 				default: error(20); break;
 		}
 		
 	}
    
 }
 void expression()
 {
 	if (accept(plussym) || accept(minussym))
 	{
 		gettok();
 		
 		if (accept(minussym))
 		{
            		codeGen(OPR,0,1); //CODEGEN: make the current top of the stack negative
 		}
 	}
 	if (accept(slashsym) || accept(multsym)) //expressions cannot begin with these
 	{
 	     	error(24);
 	}       
 	term();         
 	while (accept(plussym) || accept(minussym))
 	{	      
 		if (accept(plussym))
 		{  
 			   gettok();
 			   if (accept(procsym))
 			   {
 			   	  error(21);
 			   }
 		   term();

 		   codeGen(OPR,0,2); //CODEGEN: addition
 		}
 		else if (accept(minussym))
 		{
 			   gettok();
 			   if (accept(procsym))
 			   {
 			   	  error(21);
 			   }
 		   term();
 		     
 		   codeGen(OPR,0,3); //CODEGEN: subtraction
 		}		
 	}
 	
 	
 }
 void term()
 {
    factor();
 	while (accept(multsym) || accept(slashsym))
 	{	
        if (accept(multsym))
        { 
            gettok();
     		factor();
 			codeGen(OPR,0,4); //CODEGEN: multiplication
        }	
        else if (accept(slashsym))
        {
            gettok();
     		factor();             
 			codeGen(OPR,0,5); //CODEGEN: division
        }		
 	}
 }

 //can be an identifier, a number, or an expression
 void factor()
 {
 	if (accept(identsym))
 	{
 		char* tempIdentName = tokenList[crntTokenListIndex-1].name;
 		symbol s = findSymbol(tempIdentName,crntLevel);
 		if (s.kind == 1) //CODEGEN: constant
 		{
 			codeGen(LIT,0,atoi(s.val));
 		}
 		else if (s.kind == 2) //CODEGEN: var
 		{
 			codeGen(LOD,crntLevel - s.level,s.addr); //CODEGEN: load the token given by the current identifier
 		}
 		gettok();
 	}
 	else if(accept(numbersym)) 
 	{
 		codeGen(LIT,0,atoi(tokenList[crntTokenListIndex - 1].name)); //CODEGEN: load the number associated with this token as a literal
                                                                     //we have to decrement the index because getTok already incremented it.
                                                                     
 		gettok();
 	}
 	else if (accept(lparentsym)) //expression inside parentheses
 	{
 		gettok();
 		expression();
 		if (!accept(rparentsym))
 		{
 			error(22); //no rparen error
 		}
 		gettok();
 	}
 	else
 	{
 		error(23);
 	}
 }

 /**
 generates assembly code for a specified operation, M, and L value. Adds it to instruction list.
 */
 void codeGen(operation opr, int L, int M)
 {
 	instruction temp;
 	temp.OP = opr;
 	temp.L = L;
    	temp.M = M;
 	codeBase[codeBaseSize] = temp; //the code    
 	codeBaseSize++;
 }

 /*
   change M value of code at address "address" to "M"
 */
 void changeMvalue(int address, int M)
 {
     codeBase[address].M = M;
 }

 void printCodeBase()
 {
 	outputFile = fopen("codegenoutput.txt","w");
 	int i;
 	for (i = 0; i < codeBaseSize; i++)
 	{
 		fprintf(outputFile,"%d %d %d\n",codeBase[i].OP,codeBase[i].L, codeBase[i].M);
    	printf("%d %d %d\n",codeBase[i].OP,codeBase[i].L, codeBase[i].M);
 	}
 }



 /*
 P-Code translation machine
 By:	Stephen Bryant
 
 Thursday, September 02, 2010.
 */ 


 void pmachine(void)
 {
 	//open files
 	inputFile = fopen("codegenoutput.txt","r");
 	outputFile = fopen("pmachineoutput.txt","w");
 	
 	/*Init stack*/
 	stack[1] = 0;
 	stack[2] = 0;
 	stack[3] = 0;
 	
 	//temp struct
 	instruction temp;
 	codeBaseSize = 0;
 	
 	//print header
 	fprintf(outputFile,"%s\t%s\t%s\t%s\n","Line","OP","L","M");
 	
 	while (fscanf(inputFile,"%d %d %d",((int)(codeBase[codeBaseSize].OP)),&(codeBase[codeBaseSize].L),&(codeBase[codeBaseSize].M)) != EOF) //read input.txt into codebase
 	{ 
 		printCodeLine(codeBaseSize); //print current line being read
 		fprintf(outputFile,"\n");
 		codeBaseSize++;	
 	}	
 	
 	//header for execution
 	fprintf(outputFile,"\n\n\n");
 	fprintf(outputFile,"\t\t\t\t\tPC\tBP\tSP\tStack\n");
 	fprintf(outputFile,"Initial Values\t\t\t\t%d\t%d\t%d\t",PC,BP,SP);
 	printStack(BP,SP);
 	fprintf(outputFile,"\n");
 	
 	//loop fetch/execute
 	int flag = 0;
 	do
 	{
 		
 		printCodeLine(PC);	
 		fetch();
 		//we're done executing after this loop
 		if (IR->OP == 2 && IR->M  == 0 && (BP == 1))
 		{
 			flag = 1;
 		}
 		execute();
 		
 		fprintf(outputFile,"\t%d\t%d\t%d\t",PC,BP,SP);
 		printStack(BP,SP);
 		fprintf(outputFile,"\n");
 		
 	} while(flag == 0); //if this were true, it would mean the program is returning and there is nothing left on the stack
 	
 	fclose(inputFile);
 	fclose(outputFile);
 	
 	
 }


 void printCodeLine(int i)
 {
 	fprintf(outputFile,"%d\t", i); //print line number
 	switch (codeBase[i].OP)
 	{
 		case 1: //LIT: push literal "M" to top of the stack
 			fprintf(outputFile,"lit\t");
 			break;
 		case 2: //OPR: perform operation
 			fprintf(outputFile,"opr\t");
 			break;
 		case 3: //LOD: load item at offset M, L levels down to the top of the stack.
 			fprintf(outputFile,"lod\t");
 			break;
 		case 4: //STO: store item at offset M, L levels down from the top of the stack.
 			fprintf(outputFile,"sto\t");
 			break;
 		case 5: //CAL: create a new frame.
 			fprintf(outputFile,"cal\t");
 			break;
 		case 6: //INC: allocate M locals (first 3 are SL, DL, RA)
 			fprintf(outputFile,"inc\t");
 			break;
 		case 7: //JMP: jump to line M of the code base
 			fprintf(outputFile,"jmp\t");
 			break;
 		case 8: //JPC: jump to line M of the code base if the item on top of the stack is 0
 			fprintf(outputFile,"jpc\t");
 			break;
 		case 9: //SOI: print the top of the stack
 			fprintf(outputFile,"soi\t");
 			break;
 		case 10: //SIO: store input to the stack at point SP. 
 			fprintf(outputFile,"sio\t");
 			break;
 		default:
 			break;
 	}
 	fprintf(outputFile,"%d\t%d\t",codeBase[i].L,codeBase[i].M);
 }

 //print entire populated stack.
 void printStack(int tempBP, int tempSP)
 {
 	int i;
 	if (tempBP == 0)
 	{
 		return;
 	}
 	else
 	{
 		printStack(stack[tempBP + 1],tempBP-1);
 		if (tempBP != 1)
 		{
 			fprintf(outputFile,"| ");
 		}
 		if (tempBP == tempSP + 1) //this would be if someone used "CAL" recently, the stack pointer would not have incremented and the new AR would need to show up.
 		{
 			for (i = tempBP; i < tempBP + 3; i++)
 			{
 				fprintf(outputFile,"%d ",stack[i]);
 			}
 			
 		}
 		else //this is a normal AR without a recent "CAL"
 		{
 			for (i = tempBP; i <= tempSP; i++)
 			{
 				fprintf(outputFile,"%d ",stack[i]);
 			}
 		}
 	}
 }


 //fetch cycle, retrieves code from codebase to IR
 void fetch()
 {
 	IR = &codeBase[PC];
 	PC++;
 }

 //execute cycle, executes code in IR
 void execute()
 {
 	int op = IR->OP;
 	switch (op)
 	{
 		case 1: //LIT: push literal "M" to top of the stack
 			SP++;
 			stack[SP] = IR->M;
 			break;
 		case 2: //OPR: perform operation
 			
 			switch(IR->M)
 		{
 			case 0: //RTN: return operation
 				SP = BP - 1; //top of the stack is the base pointer of the current frame - 1 (since the frame is deleted)
 				PC = stack[SP + 3]; //new program counter is the return address from the frame
 				BP = stack[SP + 2]; //new base pointer is the dynamic link from the current frame
 				break;
 			case 1: //NEG: negative of the top of the stack
 				stack[SP] = - stack[SP];
 				break;
 			case 2: //ADD: add top two fields, store result on the top of the stack
 				SP--;
 				stack[SP] = stack[SP] + stack[SP+1]; 
 				break;
 			case 3: //SUB: subtract top field from the second field, store result on top of the stack
 				SP--;
 				stack[SP] = stack[SP] - stack[SP+1];
 				break;
 			case 4: //MUL: multiply top two items, store result on the top of the stack
 				SP--;
 				stack[SP] = stack[SP] * stack[SP+1];
 				break;
 			case 5: //DIV: divide second field by top field, store result
 				SP--;
 				stack[SP] = stack[SP] / stack[SP+1];
 				break;
 			case 6: //ODD: returns true if the top field is odd
 				stack[SP] = stack[SP] % 2;
 				break;
 			case 7: //MOD: mod the field one level down by the field on top of the stack, store on top of the stack.
 				SP--;
 				stack[SP] = stack[SP] % stack[SP+1];
 				break;
 			case 8:  //EQL: whether the top two fields are equal, adds a boolean value to the top of the stack.
 				SP--;
 				stack[SP] = (stack[SP] == stack[SP+1]);
 				break;
 			case 9: //NEQ: when the top two fields are not equal, adds a boolean value true to the top of the stack, or false.
 				SP--;
 				stack[SP] = !(stack[SP] == stack[SP+1]);
 			case 10: //LSS: adds to the top of the stack true if the first argument is less than the second argument.
 				SP--;
 				stack[SP] = stack[SP] < stack[SP+1];
 				break;
 			case 11: //LEQ: adds to the top of the stack true if the first argument is less than or equal to the second argument.
 				SP--;
 				stack[SP] = stack[SP] <= stack[SP+1];
 				break;
 			case 12:  //GTR: adds to the top of the stack true if the first argument is greater than the second argument.
 				SP--;
 				stack[SP] = stack[SP] > stack[SP+1];
 				break;
 			case 13: //GEQ: adds to the top of the stack "true" if the first argument is greater than or equal to the second argument.
 				SP--;
 				stack[SP] = stack[SP] >= stack[SP+1];
 				break;
 			default:
 				exit(0); //error
 				break;			
 		}
 			break;
 			
 		case 3: //LOD: load item at offset M, L levels down to the top of the stack.
 			SP++;
 			stack[SP] = stack[base(IR->L,BP) + IR->M];
 			break;
 		case 4: //STO: store item at offset M, L levels down from the top of the stack.
 			stack[base(IR->L,BP) + IR->M] = stack[SP];
 			SP--;
 			break;
 		case 5: //CAL: create a new frame.
 			stack[SP + 1] = base(IR->L,BP); //static link
 			stack[SP + 2] = BP; //dynamic link: points to previous stack frame
 			stack[SP + 3] = PC; //Address in code of next code segment
 			BP = SP + 1; //base pointer to the bottom of the new frame
 			PC = IR->M;
 			break;
 		case 6: //INC: allocate M locals (first 3 are SL, DL, RA)
 			SP += IR->M;
 			break;
 		case 7: //JMP: jump to line M of the code base
 			PC = IR->M;
 			break;
 		case 8: //JPC: jump to line M of the code base if the item on top of the stack is 0
 			if (stack[SP] == 0)
 			{
 				PC = IR->M;
 				SP--;
 			}
 			break;
 		case 9: //SOI: print the top of the stack
 			fprintf(outputFile,"\nTop Of the stack:%d",stack[SP]);
 			SP--;
 			break;
 		case 10: //SIO: store input to the stack at point SP. 
 			fscanf(inputFile,"%d ",&stack[SP]);
 			SP++;
 			break;
 		default:
 			break;
 	}
 }

 /**********************************************/
 /* Find base L levels down */
 /* */
 /**********************************************/
 int base(int l,int base) // l stand for L in the instruction format
 {
 	int b1; //find base L levels down
 	b1 = base;
 	while (l > 0)
 	{
 		b1 = stack[b1]; //base pointer equals the current base pointer (which points down to the previous frame's base)
 		l--;
 	}
 	return b1;
 }