Skip to content

Instantly share code, notes, and snippets.

@moonwatcher
Created October 23, 2014 04:41
Show Gist options
  • Select an option

  • Save moonwatcher/285cec71d15eddb8356b to your computer and use it in GitHub Desktop.

Select an option

Save moonwatcher/285cec71d15eddb8356b to your computer and use it in GitHub Desktop.
/*
Operating Systems Fall 2013, Lab 1. 23 september 2013
Lior Galanti lior.galanti@nyu.edu N14314920
*/
#define BUFFER_SIZE 2048
#define SYMBOL_SIZE 16
#define INSTR_SIZE 16
#define MEM_ADDR 16
#define DEFLIST_SIZE 16
#define USELIST_SIZE 16
#define MACHINE_MEM 512
#define SYMBOL_TABLE_SIZE 256
#define ERROR_MESSAGE_SIZE 256
typedef int bool;
#define true 1
#define false 0
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
typedef struct {
int line;
int start;
int end;
int length;
char* value;
} token_t;
typedef struct {
char* name;
int value;
char* error;
int used;
int module;
} symbol_table_entry_t;
typedef struct {
char** symbols;
int* usage;
int symbolcount;
int base;
int size;
} module_t;
typedef struct {
int tokencount;
int symbolcount;
int modulecount;
token_t* tokens;
module_t* modules;
int last_line;
int last_char;
symbol_table_entry_t symbol_table[SYMBOL_TABLE_SIZE];
} program_t;
typedef enum {DEFINITION, USE, PROGRAM} parse_state_t;
// function declerations
void deallocate(program_t* program);
void __parseerror(int errcode, int linenum, int lineoffset);
program_t* read_tokens(char* filename);
int get_symbol_index_in_table(program_t* program, char* symbol);
int get_symbol_index_in_module(module_t module, char* symbol);
bool token_is_numeric(token_t token);
int token_is_symbol(token_t token);
bool token_is_instruction_type(token_t token);
bool token_is_instruction(token_t token);
void pass_one(program_t* program);
void pass_two(program_t* program);
void deallocate(program_t* program);
// code...
void __parseerror(int errcode, int linenum, int lineoffset) {
static char* errstr[] = {
"NUM_EXPECTED",
"SYM_EXPECTED",
"ADDR_EXPECTED",
"SYM_TOLONG",
"TO_MANY_DEF_IN_MODULE",
"TO_MANY_USE_IN_MODULE",
"TO_MANY_SYMBOLS",
"TO_MANY_INSTR",
};
printf("Parse Error line %d offset %d: %s\n", linenum, lineoffset, errstr[errcode]);
}
program_t* read_tokens(char* filename) {
program_t* program;
program = (program_t*) malloc(sizeof(program_t));
(*program).tokencount = 0;
(*program).symbolcount = 0;
(*program).last_line = 0;
(*program).last_char = 0;
token_t tokens[MACHINE_MEM];
char buffer[BUFFER_SIZE];
FILE *fp;
fp = fopen(filename, "r");
if (fp == NULL) {
printf("Error opening file\n");
} else {
int linep = 0, token_start;
while(fgets(buffer, BUFFER_SIZE, fp) != NULL) {
(*program).last_line = linep;
int charp = 0;
bool in_token = false;
for (charp = 0; buffer[charp] != 0; charp++){
(*program).last_char = charp;
if (buffer[charp] == ' ' || buffer[charp] == '\n' || buffer[charp] == '\t') {
if (in_token) {
in_token = false;
tokens[(*program).tokencount].line = linep;
tokens[(*program).tokencount].start = token_start;
tokens[(*program).tokencount].end = charp;
tokens[(*program).tokencount].length = charp - token_start;
tokens[(*program).tokencount].value = strndup(buffer + token_start, charp - token_start);
(*program).tokencount++;
}
} else {
if (!in_token) {
in_token = true;
token_start = charp;
}
}
}
linep++;
}
fclose(fp);
(*program).tokens = (token_t*) malloc(sizeof(token_t) * (*program).tokencount);
int i;
for(i=0;i<(*program).tokencount;i++){
(*program).tokens[i].line = tokens[i].line;
(*program).tokens[i].start = tokens[i].start;
(*program).tokens[i].end = tokens[i].end;
(*program).tokens[i].length = tokens[i].length;
(*program).tokens[i].value = tokens[i].value;
}
}
return program;
}
int get_symbol_index_in_table(program_t* program, char* symbol) {
// I know, this is horrible...
// a Hashtable would have made so much more sense.
int result = -1;
int i =0;
for(i=0; i<(*program).symbolcount; i++){
if(strcmp((*program).symbol_table[i].name, symbol) == 0) {
result = i;
break;
}
}
return result;
}
int get_symbol_index_in_module(module_t module, char* symbol) {
// Not as horrible but still needs a hushtable
int result = -1;
int i =0;
for(i=0; i<module.symbolcount; i++){
if(strcmp(module.symbols[i], symbol) == 0) {
result = i;
break;
}
}
return result;
}
bool token_is_numeric(token_t token) {
bool result = true;
int i;
for(i=0;i<token.length;i++) {
result = result && isdigit(token.value[i]);
}
return result;
}
int token_is_symbol(token_t token) {
// 1 means yes
// 0 means no
// -2 means token is too long
int result = 1;
if(token.length <= SYMBOL_SIZE) {
if(isalpha(token.value[0])) {
if(token.length > 1) {
int i;
for(i=1;i<token.length;i++) {
result = result && (isalpha(token.value[i]) || isdigit(token.value[i]));
}
}
} else {
result = 0;
}
} else {
result = -2;
}
return result;
}
bool token_is_instruction_type(token_t token) {
bool result = true;
if(token.length == 1) {
if(!(token.value[0] == 'I' || token.value[0] == 'A' || token.value[0] == 'R' || token.value[0] == 'E')){
result = false;
}
} else {
result = false;
}
return result;
}
bool token_is_instruction(token_t token) {
bool result = true;
if(!token_is_numeric(token)) {
result = false;
}
return result;
}
void pass_one(program_t* program) {
parse_state_t parse_state = DEFINITION;
int token_index = 0;
int total_instr = 0;
int total_symbol = 0;
int module_base = 0;
int module_index = 0;
module_t modules[MACHINE_MEM];
while(token_index < (*program).tokencount){
switch (parse_state) {
case DEFINITION:
// first token should be defcount
if(token_is_numeric((*program).tokens[token_index])){
int defcount = atoi((*program).tokens[token_index].value);
int defcount_index = token_index;
if(defcount > DEFLIST_SIZE) {
__parseerror(4, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
deallocate(program);
exit(0);
}
// now we have to verify we have exactly defcount definition pairs
token_index++;
while(token_index <= defcount_index + defcount * 2) {
char* symbol_name;
int symbol_value;
int symbol_index;
if(!(token_index < (*program).tokencount)){
// if we run out of tokens before we are done then we are missing a symbol
__parseerror(1, (*program).last_line + 1, (*program).last_char + 1);
void deallocate(program_t* program);
exit(0);
} else if(token_is_symbol((*program).tokens[token_index]) == 0){
__parseerror(1, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
} else if(token_is_symbol((*program).tokens[token_index]) == -2){
__parseerror(3, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
}
symbol_name = (*program).tokens[token_index].value;
token_index++;
if(!(token_index < (*program).tokencount)){
// if we run out of tokens before we are done then we are missing a number
__parseerror(0, (*program).last_line + 1, (*program).last_char + 1);
void deallocate(program_t* program);
exit(0);
} else if(!token_is_numeric((*program).tokens[token_index])){
__parseerror(0, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
}
symbol_value = atoi((*program).tokens[token_index].value) + module_base;
symbol_index = get_symbol_index_in_table(program, symbol_name);
if(symbol_index < 0) {
(*program).symbol_table[(*program).symbolcount].name = symbol_name;
(*program).symbol_table[(*program).symbolcount].value = symbol_value;
(*program).symbol_table[(*program).symbolcount].error = "";
(*program).symbol_table[(*program).symbolcount].used = 0;
(*program).symbol_table[(*program).symbolcount].module = module_index;
(*program).symbolcount++;
} else {
(*program).symbol_table[symbol_index].error = " Error: This variable is multiple times defined; first value used";
}
token_index++;
}
} else {
// we are missing the defcount
__parseerror(0, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
}
parse_state = USE;
break;
case USE:
// first token should be usecount
if(token_is_numeric((*program).tokens[token_index])){
int usecount = atoi((*program).tokens[token_index].value);
int usecount_index = token_index;
if(usecount > DEFLIST_SIZE) {
__parseerror(5, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
}
// prepare a module struct
modules[module_index].symbolcount = 0;
modules[module_index].symbols = (char**) malloc(sizeof(char) * SYMBOL_SIZE * usecount);
modules[module_index].usage = (int*) malloc(sizeof(int) * usecount);
// initiallize the usage counter
int i;
for(i=0;i<usecount;i++) {
modules[module_index].usage[i] = 0;
}
// now we have to verify we have exactly usecount symbols
token_index++;
while(token_index <= usecount_index + usecount) {
if(!(token_index < (*program).tokencount)){
// if we run out of tokens before we are done then we are missing a symbol
__parseerror(1, (*program).last_line + 1, (*program).last_char + 1);
void deallocate(program_t* program);
exit(0);
} else if (token_is_symbol((*program).tokens[token_index]) == 0){
__parseerror(1, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
} else if(token_is_symbol((*program).tokens[token_index]) == -2){
__parseerror(3, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
} else {
modules[module_index].symbols[modules[module_index].symbolcount] = (*program).tokens[token_index].value;
modules[module_index].symbolcount++;
}
token_index++;
}
} else {
// we are missing the usecount
__parseerror(0, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
}
parse_state = PROGRAM;
break;
case PROGRAM:
// first token should be codecount
if(token_is_numeric((*program).tokens[token_index])){
int codecount = atoi((*program).tokens[token_index].value);
int codecount_index = token_index;
modules[module_index].size = codecount;
modules[module_index].base = module_base;
module_base += codecount;
total_instr += codecount;
if(total_instr > MACHINE_MEM) {
__parseerror(7, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
}
// now we have to verify we have exactly codecount (type,intr) pairs
token_index++;
while(token_index <= codecount_index + codecount * 2) {
if(!(token_index < (*program).tokencount)){
// if we run out of tokens before we are done then we are missing an instruction type
__parseerror(2, (*program).last_line + 1, (*program).last_char + 1);
void deallocate(program_t* program);
exit(0);
} else if(!token_is_instruction_type((*program).tokens[token_index])){
__parseerror(2, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
}
token_index++;
if(!(token_index < (*program).tokencount)){
// if we run out of tokens before we are done then we are missing an instruction
__parseerror(2, (*program).last_line + 1, (*program).last_char + 1);
void deallocate(program_t* program);
exit(0);
} else if(!token_is_instruction((*program).tokens[token_index])){
__parseerror(2, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
}
token_index++;
}
} else {
//we are missing codecount
__parseerror(0, (*program).tokens[token_index].line + 1, (*program).tokens[token_index].start + 1);
void deallocate(program_t* program);
exit(0);
}
module_index++;
parse_state = DEFINITION;
break;
}
}
// populate the program struct with information about the modules
(*program).modulecount = module_index;
(*program).modules = (module_t*) malloc(sizeof(module_t) * (*program).modulecount);
int i;
for(i=0;i<(*program).modulecount;i++){
(*program).modules[i].symbols = modules[i].symbols;
(*program).modules[i].symbolcount = modules[i].symbolcount;
(*program).modules[i].base = modules[i].base;
(*program).modules[i].size = modules[i].size;
(*program).modules[i].usage = modules[i].usage;
}
for(i=0; i<(*program).symbolcount; i++){
module_t module = (*program).modules[(*program).symbol_table[i].module];
if((*program).symbol_table[i].value - module.base > module.size - 1) {
printf("Warning: Module %d: %s to big %d (max=%d) assume zero relative\n",
(*program).symbol_table[i].module + 1,
(*program).symbol_table[i].name,
(*program).symbol_table[i].value,
module.size - 1
);
(*program).symbol_table[i].value = 0;
}
}
//print the symbol table
printf("Symbol Table\n");
for(i=0; i<(*program).symbolcount; i++){
printf("%s=%d%s\n", (*program).symbol_table[i].name, (*program).symbol_table[i].value, (*program).symbol_table[i].error);
}
}
void pass_two(program_t* program) {
parse_state_t parse_state = DEFINITION;
int token_index = 0;
int module_index = 0;
int memory_address = 0;
int defcount, usecount, codecount;
printf("\nMemory Map\n");
while(token_index < (*program).tokencount){
switch (parse_state) {
case DEFINITION:
// on the second run we are not interested in the definitions, lets just skip them
defcount = atoi((*program).tokens[token_index].value);
token_index += defcount * 2 + 1;
parse_state = USE;
break;
case USE:
// we are not interested in the use list either...
usecount = atoi((*program).tokens[token_index].value);
token_index += usecount + 1;
parse_state = PROGRAM;
break;
case PROGRAM:
codecount = atoi((*program).tokens[token_index].value);
int codecount_index = token_index;
module_t module = (*program).modules[module_index];
token_index++;
while(token_index <= codecount_index + codecount * 2) {
char mem[MEM_ADDR];
char instr[INSTR_SIZE];
sprintf(mem, "%.03d", memory_address);
char instr_type = (*program).tokens[token_index].value[0];
token_index++;
int instrn = atoi((*program).tokens[token_index].value);
if ((*program).tokens[token_index].length <= 4) {
sprintf(instr, "%.04d", instrn);
}
if(instr_type == 'I'){
if (instrn > 9999) {
printf("%s: 9999 Error: Illegal immediate value; treated as 9999\n", mem);
} else {
printf("%s: %s\n", mem, instr);
}
} else if(instr_type == 'A'){
char opcode = instr[0];
int operand = atoi(instr+1);
char* error = "";
if (operand >= MACHINE_MEM) {
operand = 0;
error = " Error: Absolute address exceeds machine size; zero used";
}
char operands[INSTR_SIZE];
sprintf(operands, "%.04d", operand);
operands[0] = opcode;
printf("%s: %s%s\n", mem, operands, error);
} else if(instr_type == 'R'){
if ((*program).tokens[token_index].length > 4) {
printf("%s: 9999 Error: Illegal opcode; treated as 9999\n", mem);
} else {
char opcode = instr[0];
int operand = atoi(instr+1);
char* error = "";
if (operand > module.size) {
operand = 0;
error = " Error: Relative address exceeds module size; zero used\n";
}
operand += module.base;
char operands[INSTR_SIZE];
sprintf(operands, "%.04d", operand);
operands[0] = opcode;
printf("%s: %s%s\n", mem, operands, error);
}
} else if(instr_type == 'E'){
if ((*program).tokens[token_index].length > 4) {
printf("%s: 9999 Error: Illegal opcode; treated as 9999\n", mem);
} else {
char opcode = instr[0];
int operand = atoi(instr+1);
if (operand >= module.symbolcount){
printf("%s: %s Error: External address exceeds length of uselist; treated as immediate\n", mem, instr);
} else {
char* symbol = module.symbols[operand];
int symbol_index = get_symbol_index_in_table(program, symbol);
char error[ERROR_MESSAGE_SIZE];
int symbol_module_index = get_symbol_index_in_module(module, symbol);
module.usage[symbol_module_index]++;
if(symbol_index >= 0) {
(*program).symbol_table[symbol_index].used++;
memset(error, 0, sizeof error);
operand = (*program).symbol_table[symbol_index].value;
} else {
sprintf(error, " Error: %s is not defined; zero used", symbol);
operand = 0;
}
char operands[INSTR_SIZE];
sprintf(operands, "%.04d", operand);
operands[0] = opcode;
printf("%s: %s%s\n", mem, operands, error);
}
}
}
token_index++;
memory_address++;
}
module_index++;
parse_state = DEFINITION;
break;
}
}
int i, j;
for(i=0; i<(*program).symbolcount; i++){
if((*program).symbol_table[i].used <= 0) {
printf("Warning: %s was defined in module %d but never used\n", (*program).symbol_table[i].name, (*program).symbol_table[i].module +1);
}
}
for(i=0;i<(*program).modulecount;i++){
for(j=0;j<(*program).modules[i].symbolcount;j++){
if ((*program).modules[i].usage[j] <= 0){
printf("Warning: In Module %d %s appeared in the uselist but was not actually used\n", i+1, (*program).modules[i].symbols[j]);
}
}
}
}
void deallocate(program_t* program) {
int i;
for(i=0;i<(*program).modulecount;i++){
free((*program).modules[i].symbols);
free((*program).modules[i].usage);
}
free(program);
}
int main(int argc,char *argv[]) {
program_t* program;
program = read_tokens(argv[1]);
pass_one(program);
pass_two(program);
// free memory
deallocate(program);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment