Created
October 23, 2014 04:41
-
-
Save moonwatcher/285cec71d15eddb8356b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Operating Systems Fall 2013, Lab 1. 23 september 2013 | |
| Lior Galanti lior.galanti@nyu.edu N14314920 | |
| */ | |
| #define BUFFER_SIZE 2048 | |
| #define SYMBOL_SIZE 16 | |
| #define INSTR_SIZE 16 | |
| #define MEM_ADDR 16 | |
| #define DEFLIST_SIZE 16 | |
| #define USELIST_SIZE 16 | |
| #define MACHINE_MEM 512 | |
| #define SYMBOL_TABLE_SIZE 256 | |
| #define ERROR_MESSAGE_SIZE 256 | |
| typedef int bool; | |
| #define true 1 | |
| #define false 0 | |
| #include <stdlib.h> | |
| #include <stdio.h> | |
| #include <string.h> | |
| #include <ctype.h> | |
| typedef struct { | |
| int line; | |
| int start; | |
| int end; | |
| int length; | |
| char* value; | |
| } token_t; | |
| typedef struct { | |
| char* name; | |
| int value; | |
| char* error; | |
| int used; | |
| int module; | |
| } symbol_table_entry_t; | |
| typedef struct { | |
| char** symbols; | |
| int* usage; | |
| int symbolcount; | |
| int base; | |
| int size; | |
| } module_t; | |
| typedef struct { | |
| int tokencount; | |
| int symbolcount; | |
| int modulecount; | |
| token_t* tokens; | |
| module_t* modules; | |
| int last_line; | |
| int last_char; | |
| symbol_table_entry_t symbol_table[SYMBOL_TABLE_SIZE]; | |
| } program_t; | |
| typedef enum {DEFINITION, USE, PROGRAM} parse_state_t; | |
| // function declerations | |
| void deallocate(program_t* program); | |
| void __parseerror(int errcode, int linenum, int lineoffset); | |
| program_t* read_tokens(char* filename); | |
| int get_symbol_index_in_table(program_t* program, char* symbol); | |
| int get_symbol_index_in_module(module_t module, char* symbol); | |
| bool token_is_numeric(token_t token); | |
| int token_is_symbol(token_t token); | |
| bool token_is_instruction_type(token_t token); | |
| bool token_is_instruction(token_t token); | |
| void pass_one(program_t* program); | |
| void pass_two(program_t* program); | |
| void deallocate(program_t* program); | |
| // code... | |
| void __parseerror(int errcode, int linenum, int lineoffset) { | |
| static char* errstr[] = { | |
| "NUM_EXPECTED", | |
| "SYM_EXPECTED", | |
| "ADDR_EXPECTED", | |
| "SYM_TOLONG", | |
| "TO_MANY_DEF_IN_MODULE", | |
| "TO_MANY_USE_IN_MODULE", | |
| "TO_MANY_SYMBOLS", | |
| "TO_MANY_INSTR", | |
| }; | |
| printf("Parse Error line %d offset %d: %s\n", linenum, lineoffset, errstr[errcode]); | |
| } | |
| program_t* read_tokens(char* filename) { | |
| program_t* program; | |
| program = (program_t*) malloc(sizeof(program_t)); | |
| (*program).tokencount = 0; | |
| (*program).symbolcount = 0; | |
| (*program).last_line = 0; | |
| (*program).last_char = 0; | |
| token_t tokens[MACHINE_MEM]; | |
| char buffer[BUFFER_SIZE]; | |
| FILE *fp; | |
| fp = fopen(filename, "r"); | |
| if (fp == NULL) { | |
| printf("Error opening file\n"); | |
| } else { | |
| int linep = 0, token_start; | |
| while(fgets(buffer, BUFFER_SIZE, fp) != NULL) { | |
| (*program).last_line = linep; | |
| int charp = 0; | |
| bool in_token = false; | |
| for (charp = 0; buffer[charp] != 0; charp++){ | |
| (*program).last_char = charp; | |
| if (buffer[charp] == ' ' || buffer[charp] == '\n' || buffer[charp] == '\t') { | |
| if (in_token) { | |
| in_token = false; | |
| tokens[(*program).tokencount].line = linep; | |
| tokens[(*program).tokencount].start = token_start; | |
| tokens[(*program).tokencount].end = charp; | |
| tokens[(*program).tokencount].length = charp - token_start; | |
| tokens[(*program).tokencount].value = strndup(buffer + token_start, charp - token_start); | |
| (*program).tokencount++; | |
| } | |
| } else { | |
| if (!in_token) { | |
| in_token = true; | |
| token_start = charp; | |
| } | |
| } | |
| } | |
| linep++; | |
| } | |
| fclose(fp); | |
| (*program).tokens = (token_t*) malloc(sizeof(token_t) * (*program).tokencount); | |
| int i; | |
| for(i=0;i<(*program).tokencount;i++){ | |
| (*program).tokens[i].line = tokens[i].line; | |
| (*program).tokens[i].start = tokens[i].start; | |
| (*program).tokens[i].end = tokens[i].end; | |
| (*program).tokens[i].length = tokens[i].length; | |
| (*program).tokens[i].value = tokens[i].value; | |
| } | |
| } | |
| return program; | |
| } | |
| int get_symbol_index_in_table(program_t* program, char* symbol) { | |
| // I know, this is horrible... | |
| // a Hashtable would have made so much more sense. | |
| int result = -1; | |
| int i =0; | |
| for(i=0; i<(*program).symbolcount; i++){ | |
| if(strcmp((*program).symbol_table[i].name, symbol) == 0) { | |
| result = i; | |
| break; | |
| } | |
| } | |
| return result; | |
| } | |
| int get_symbol_index_in_module(module_t module, char* symbol) { | |
| // Not as horrible but still needs a hushtable | |
| int result = -1; | |
| int i =0; | |
| for(i=0; i<module.symbolcount; i++){ | |
| if(strcmp(module.symbols[i], symbol) == 0) { | |
| result = i; | |
| break; | |
| } | |
| } | |
| return result; | |
| } | |
| bool token_is_numeric(token_t token) { | |
| bool result = true; | |
| int i; | |
| for(i=0;i<token.length;i++) { | |
| result = result && isdigit(token.value[i]); | |
| } | |
| return result; | |
| } | |
| int token_is_symbol(token_t token) { | |
| // 1 means yes | |
| // 0 means no | |
| // -2 means token is too long | |
| int result = 1; | |
| if(token.length <= SYMBOL_SIZE) { | |
| if(isalpha(token.value[0])) { | |
| if(token.length > 1) { | |
| int i; | |
| for(i=1;i<token.length;i++) { | |
| result = result && (isalpha(token.value[i]) || isdigit(token.value[i])); | |
| } | |
| } | |
| } else { | |
| result = 0; | |
| } | |
| } else { | |
| result = -2; | |
| } | |
| return result; | |
| } | |
| bool token_is_instruction_type(token_t token) { | |
| bool result = true; | |
| if(token.length == 1) { | |
| if(!(token.value[0] == 'I' || token.value[0] == 'A' || token.value[0] == 'R' || token.value[0] == 'E')){ | |
| result = false; | |
| } | |
| } else { | |
| result = false; | |
| } | |
| return result; | |
| } | |
| bool token_is_instruction(token_t token) { | |
| bool result = true; | |
| if(!token_is_numeric(token)) { | |
| result = false; | |
| } | |
| return result; | |
| } | |
| void pass_one(program_t* program) { | |
| parse_state_t parse_state = DEFINITION; | |
| int token_index = 0; | |
| int total_instr = 0; | |
| int total_symbol = 0; | |
| int module_base = 0; | |
| int module_index = 0; | |
| module_t modules[MACHINE_MEM]; | |
| while(token_index < (*program).tokencount){ | |
| switch (parse_state) { | |
| case DEFINITION: | |
| // first token should be defcount | |
| if(token_is_numeric((*program).tokens[token_index])){ | |
| int defcount = atoi((*program).tokens[token_index].value); | |
| int defcount_index = token_index; | |
| if(defcount > DEFLIST_SIZE) { | |
| __parseerror(4, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| deallocate(program); | |
| exit(0); | |
| } | |
| // now we have to verify we have exactly defcount definition pairs | |
| token_index++; | |
| while(token_index <= defcount_index + defcount * 2) { | |
| char* symbol_name; | |
| int symbol_value; | |
| int symbol_index; | |
| if(!(token_index < (*program).tokencount)){ | |
| // if we run out of tokens before we are done then we are missing a symbol | |
| __parseerror(1, (*program).last_line + 1, (*program).last_char + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } else if(token_is_symbol((*program).tokens[token_index]) == 0){ | |
| __parseerror(1, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } else if(token_is_symbol((*program).tokens[token_index]) == -2){ | |
| __parseerror(3, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } | |
| symbol_name = (*program).tokens[token_index].value; | |
| token_index++; | |
| if(!(token_index < (*program).tokencount)){ | |
| // if we run out of tokens before we are done then we are missing a number | |
| __parseerror(0, (*program).last_line + 1, (*program).last_char + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } else if(!token_is_numeric((*program).tokens[token_index])){ | |
| __parseerror(0, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } | |
| symbol_value = atoi((*program).tokens[token_index].value) + module_base; | |
| symbol_index = get_symbol_index_in_table(program, symbol_name); | |
| if(symbol_index < 0) { | |
| (*program).symbol_table[(*program).symbolcount].name = symbol_name; | |
| (*program).symbol_table[(*program).symbolcount].value = symbol_value; | |
| (*program).symbol_table[(*program).symbolcount].error = ""; | |
| (*program).symbol_table[(*program).symbolcount].used = 0; | |
| (*program).symbol_table[(*program).symbolcount].module = module_index; | |
| (*program).symbolcount++; | |
| } else { | |
| (*program).symbol_table[symbol_index].error = " Error: This variable is multiple times defined; first value used"; | |
| } | |
| token_index++; | |
| } | |
| } else { | |
| // we are missing the defcount | |
| __parseerror(0, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } | |
| parse_state = USE; | |
| break; | |
| case USE: | |
| // first token should be usecount | |
| if(token_is_numeric((*program).tokens[token_index])){ | |
| int usecount = atoi((*program).tokens[token_index].value); | |
| int usecount_index = token_index; | |
| if(usecount > DEFLIST_SIZE) { | |
| __parseerror(5, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } | |
| // prepare a module struct | |
| modules[module_index].symbolcount = 0; | |
| modules[module_index].symbols = (char**) malloc(sizeof(char) * SYMBOL_SIZE * usecount); | |
| modules[module_index].usage = (int*) malloc(sizeof(int) * usecount); | |
| // initiallize the usage counter | |
| int i; | |
| for(i=0;i<usecount;i++) { | |
| modules[module_index].usage[i] = 0; | |
| } | |
| // now we have to verify we have exactly usecount symbols | |
| token_index++; | |
| while(token_index <= usecount_index + usecount) { | |
| if(!(token_index < (*program).tokencount)){ | |
| // if we run out of tokens before we are done then we are missing a symbol | |
| __parseerror(1, (*program).last_line + 1, (*program).last_char + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } else if (token_is_symbol((*program).tokens[token_index]) == 0){ | |
| __parseerror(1, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } else if(token_is_symbol((*program).tokens[token_index]) == -2){ | |
| __parseerror(3, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } else { | |
| modules[module_index].symbols[modules[module_index].symbolcount] = (*program).tokens[token_index].value; | |
| modules[module_index].symbolcount++; | |
| } | |
| token_index++; | |
| } | |
| } else { | |
| // we are missing the usecount | |
| __parseerror(0, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } | |
| parse_state = PROGRAM; | |
| break; | |
| case PROGRAM: | |
| // first token should be codecount | |
| if(token_is_numeric((*program).tokens[token_index])){ | |
| int codecount = atoi((*program).tokens[token_index].value); | |
| int codecount_index = token_index; | |
| modules[module_index].size = codecount; | |
| modules[module_index].base = module_base; | |
| module_base += codecount; | |
| total_instr += codecount; | |
| if(total_instr > MACHINE_MEM) { | |
| __parseerror(7, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } | |
| // now we have to verify we have exactly codecount (type,intr) pairs | |
| token_index++; | |
| while(token_index <= codecount_index + codecount * 2) { | |
| if(!(token_index < (*program).tokencount)){ | |
| // if we run out of tokens before we are done then we are missing an instruction type | |
| __parseerror(2, (*program).last_line + 1, (*program).last_char + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } else if(!token_is_instruction_type((*program).tokens[token_index])){ | |
| __parseerror(2, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } | |
| token_index++; | |
| if(!(token_index < (*program).tokencount)){ | |
| // if we run out of tokens before we are done then we are missing an instruction | |
| __parseerror(2, (*program).last_line + 1, (*program).last_char + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } else if(!token_is_instruction((*program).tokens[token_index])){ | |
| __parseerror(2, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } | |
| token_index++; | |
| } | |
| } else { | |
| //we are missing codecount | |
| __parseerror(0, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1); | |
| void deallocate(program_t* program); | |
| exit(0); | |
| } | |
| module_index++; | |
| parse_state = DEFINITION; | |
| break; | |
| } | |
| } | |
| // populate the program struct with information about the modules | |
| (*program).modulecount = module_index; | |
| (*program).modules = (module_t*) malloc(sizeof(module_t) * (*program).modulecount); | |
| int i; | |
| for(i=0;i<(*program).modulecount;i++){ | |
| (*program).modules[i].symbols = modules[i].symbols; | |
| (*program).modules[i].symbolcount = modules[i].symbolcount; | |
| (*program).modules[i].base = modules[i].base; | |
| (*program).modules[i].size = modules[i].size; | |
| (*program).modules[i].usage = modules[i].usage; | |
| } | |
| for(i=0; i<(*program).symbolcount; i++){ | |
| module_t module = (*program).modules[(*program).symbol_table[i].module]; | |
| if((*program).symbol_table[i].value - module.base > module.size - 1) { | |
| printf("Warning: Module %d: %s to big %d (max=%d) assume zero relative\n", | |
| (*program).symbol_table[i].module + 1, | |
| (*program).symbol_table[i].name, | |
| (*program).symbol_table[i].value, | |
| module.size - 1 | |
| ); | |
| (*program).symbol_table[i].value = 0; | |
| } | |
| } | |
| //print the symbol table | |
| printf("Symbol Table\n"); | |
| for(i=0; i<(*program).symbolcount; i++){ | |
| printf("%s=%d%s\n", (*program).symbol_table[i].name, (*program).symbol_table[i].value, (*program).symbol_table[i].error); | |
| } | |
| } | |
| void pass_two(program_t* program) { | |
| parse_state_t parse_state = DEFINITION; | |
| int token_index = 0; | |
| int module_index = 0; | |
| int memory_address = 0; | |
| int defcount, usecount, codecount; | |
| printf("\nMemory Map\n"); | |
| while(token_index < (*program).tokencount){ | |
| switch (parse_state) { | |
| case DEFINITION: | |
| // on the second run we are not interested in the definitions, lets just skip them | |
| defcount = atoi((*program).tokens[token_index].value); | |
| token_index += defcount * 2 + 1; | |
| parse_state = USE; | |
| break; | |
| case USE: | |
| // we are not interested in the use list either... | |
| usecount = atoi((*program).tokens[token_index].value); | |
| token_index += usecount + 1; | |
| parse_state = PROGRAM; | |
| break; | |
| case PROGRAM: | |
| codecount = atoi((*program).tokens[token_index].value); | |
| int codecount_index = token_index; | |
| module_t module = (*program).modules[module_index]; | |
| token_index++; | |
| while(token_index <= codecount_index + codecount * 2) { | |
| char mem[MEM_ADDR]; | |
| char instr[INSTR_SIZE]; | |
| sprintf(mem, "%.03d", memory_address); | |
| char instr_type = (*program).tokens[token_index].value[0]; | |
| token_index++; | |
| int instrn = atoi((*program).tokens[token_index].value); | |
| if ((*program).tokens[token_index].length <= 4) { | |
| sprintf(instr, "%.04d", instrn); | |
| } | |
| if(instr_type == 'I'){ | |
| if (instrn > 9999) { | |
| printf("%s: 9999 Error: Illegal immediate value; treated as 9999\n", mem); | |
| } else { | |
| printf("%s: %s\n", mem, instr); | |
| } | |
| } else if(instr_type == 'A'){ | |
| char opcode = instr[0]; | |
| int operand = atoi(instr+1); | |
| char* error = ""; | |
| if (operand >= MACHINE_MEM) { | |
| operand = 0; | |
| error = " Error: Absolute address exceeds machine size; zero used"; | |
| } | |
| char operands[INSTR_SIZE]; | |
| sprintf(operands, "%.04d", operand); | |
| operands[0] = opcode; | |
| printf("%s: %s%s\n", mem, operands, error); | |
| } else if(instr_type == 'R'){ | |
| if ((*program).tokens[token_index].length > 4) { | |
| printf("%s: 9999 Error: Illegal opcode; treated as 9999\n", mem); | |
| } else { | |
| char opcode = instr[0]; | |
| int operand = atoi(instr+1); | |
| char* error = ""; | |
| if (operand > module.size) { | |
| operand = 0; | |
| error = " Error: Relative address exceeds module size; zero used\n"; | |
| } | |
| operand += module.base; | |
| char operands[INSTR_SIZE]; | |
| sprintf(operands, "%.04d", operand); | |
| operands[0] = opcode; | |
| printf("%s: %s%s\n", mem, operands, error); | |
| } | |
| } else if(instr_type == 'E'){ | |
| if ((*program).tokens[token_index].length > 4) { | |
| printf("%s: 9999 Error: Illegal opcode; treated as 9999\n", mem); | |
| } else { | |
| char opcode = instr[0]; | |
| int operand = atoi(instr+1); | |
| if (operand >= module.symbolcount){ | |
| printf("%s: %s Error: External address exceeds length of uselist; treated as immediate\n", mem, instr); | |
| } else { | |
| char* symbol = module.symbols[operand]; | |
| int symbol_index = get_symbol_index_in_table(program, symbol); | |
| char error[ERROR_MESSAGE_SIZE]; | |
| int symbol_module_index = get_symbol_index_in_module(module, symbol); | |
| module.usage[symbol_module_index]++; | |
| if(symbol_index >= 0) { | |
| (*program).symbol_table[symbol_index].used++; | |
| memset(error, 0, sizeof error); | |
| operand = (*program).symbol_table[symbol_index].value; | |
| } else { | |
| sprintf(error, " Error: %s is not defined; zero used", symbol); | |
| operand = 0; | |
| } | |
| char operands[INSTR_SIZE]; | |
| sprintf(operands, "%.04d", operand); | |
| operands[0] = opcode; | |
| printf("%s: %s%s\n", mem, operands, error); | |
| } | |
| } | |
| } | |
| token_index++; | |
| memory_address++; | |
| } | |
| module_index++; | |
| parse_state = DEFINITION; | |
| break; | |
| } | |
| } | |
| int i, j; | |
| for(i=0; i<(*program).symbolcount; i++){ | |
| if((*program).symbol_table[i].used <= 0) { | |
| printf("Warning: %s was defined in module %d but never used\n", (*program).symbol_table[i].name, (*program).symbol_table[i].module +1); | |
| } | |
| } | |
| for(i=0;i<(*program).modulecount;i++){ | |
| for(j=0;j<(*program).modules[i].symbolcount;j++){ | |
| if ((*program).modules[i].usage[j] <= 0){ | |
| printf("Warning: In Module %d %s appeared in the uselist but was not actually used\n", i+1, (*program).modules[i].symbols[j]); | |
| } | |
| } | |
| } | |
| } | |
| void deallocate(program_t* program) { | |
| int i; | |
| for(i=0;i<(*program).modulecount;i++){ | |
| free((*program).modules[i].symbols); | |
| free((*program).modules[i].usage); | |
| } | |
| free(program); | |
| } | |
| int main(int argc,char *argv[]) { | |
| program_t* program; | |
| program = read_tokens(argv[1]); | |
| pass_one(program); | |
| pass_two(program); | |
| // free memory | |
| deallocate(program); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment