Created
June 8, 2011 18:25
-
-
Save lewurm/1015002 to your computer and use it in GitHub Desktop.
C implementation of assignment 13.12.2010. compiler + VM.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
13122010 | |
*.o |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <ctype.h> | |
/* | |
== ANGABE == | |
Schriftliche Angabe: Entwirf einen Interpreter und einen Compiler in Pseudocode | |
für folgende Sprache: (Zeit: 1,5h) | |
Func => FUNC Ident ( Idents ) Expr END | |
Expr => | |
| Expr + Expr // Addition | |
| Expr = Expr // Vergleich | |
| Ident // Argumentabfrage | |
| Ident ( Exprs) // Funktionsaufruf | |
| IF Expr THEN Expr [ELSE Expr] END // Verzweigung (Else ist optional, 0 = false, alles andere true) | |
Ein Programm besteht aus einer beliebigen Menge von Funktionen (zumindest eine). | |
Die erste Funktion darf keine Parameter haben und dient als Startpunkt. Es sind | |
nur Ganzzahlen erlaubt, eine solche ist auch das Ergebnis der Berechnung. Idents | |
und Exprs geben eine Auflistung von Identifiern und Expressions an, sie waren | |
nicht näher definiert, wie auch Ident. | |
*/ | |
/* | |
Ueberlegungen: | |
- programcounter (zeigt auf instructionmem) | |
- stack- und basepointer (zeigt auf datenmem) | |
instruction set: | |
instructionformat: 5bit opcode (31-27), 11bit argcount (26-16), 16bit signed immediate (15-0) | |
- pushimm: op = 1, imm = imm. "push immediate" | |
- add: op = 2, imm = undef. push(pop() + pop()). "addition" | |
- equ: op = 3, imm = undef. push(pop() == pop()). "equal" | |
- param: op = 4, imm = basepointer offset. push(dmem[bp+imm]). "get parameter" | |
- br: op = 5, imm = ip offset. ip += imm. "branch relative" | |
- bt: op = 6, imm = ip offset. if(pop()) { ip += imm }. "branch if true" | |
- bf: op = 7, imm = ip offset. if(!pop()) { ip += imm }. "branch if false" | |
- call: op = 8, imm = ip to function. ip = imm. "call to function" | |
- ret: op = 0, imm = undef. ip = dmem[bp]. "return from function" | |
*/ | |
#if 0 | |
#define DEBUG | |
#endif | |
#define NOP 0 | |
#define PUSHIMM 1 | |
#define ADD 2 | |
#define EQU 3 | |
#define PARAM 4 | |
#define BR 5 | |
#define BT 6 | |
#define BF 7 | |
#define CALL 8 | |
#define RET 9 | |
#define OP(opcode) (opcode << 27) | |
#define GETOP(instr) (instr >> 27 & 0x1f) | |
#define ARG(argcount) ((argcount << 16)) | |
#define GETARG(instr) ((instr >> 16) & 0x7ff) | |
#define IMM(num) (num & 0xffff) | |
#define GETIMM(instr) ((int16_t) (instr & 0xffff)) | |
struct name_off { | |
char name[10]; | |
int off; | |
}; | |
void parse(char *); | |
char *pfunc(char *); | |
char *pexpr(char *); | |
int isBlank(char *); | |
void vm(void); | |
/* global vars, oh noez */ | |
uint32_t imem[1000] = {0}; | |
uint32_t dmem[1000] = {0}; | |
uint32_t ip = 0, sp = 0, bp = 0; | |
uint32_t fcount = 0, acount = 0, pcount = 0; | |
struct name_off fmapping[10]; | |
struct name_off amapping[10]; | |
struct name_off topatch[10]; | |
void printmap(struct name_off *map, int count) | |
{ | |
#ifdef DEBUG | |
int i = 0; | |
for (; i < count; i++) { | |
printf("name: \"%s\", off: \"%d\"\n", (map + i)->name, (map + i)->off); | |
} | |
#endif | |
} | |
int getOff(struct name_off *map, int count, char *str) | |
{ | |
int i = 0; | |
for (; i < count; i++) { | |
if (strncmp(str, (map + i)->name, 10) == 0) { | |
return (map + i)->off; | |
} | |
} | |
return -1; | |
} | |
char *op2name(int op) | |
{ | |
static char *name[] = {"NOP", "PUSHIMM", "ADD", "EQU", "PARAM", "BR", "BT", "BF", "CALL", "RET"}; | |
return name[op]; | |
} | |
int main(void) | |
{ | |
#if 1 | |
/* fib von 5 */ | |
char *prog = "FUNC main () fib (11) END\n" | |
"FUNC fib (arg)\n" | |
"\tIF arg = 0 THEN 0\n" | |
"\t\tELSE IF arg = 1 THEN 1\n" | |
"\t\tELSE fib (arg + -1) + fib (arg + -2) END END\n" | |
"END"; | |
#else | |
char *prog = "FUNC main () asdf (1 2 3 4) END\n" | |
"FUNC asdf (aaa bbb ccc ddd) bbb END\n" | |
; | |
#endif | |
/* init stuff */ | |
memset(fmapping, 0, sizeof(fmapping)); | |
memset(topatch, 0, sizeof(topatch)); | |
printf("input: \n%s\n", prog); | |
parse(prog); | |
#ifdef DEBUG | |
printf("after parse, imem dump:\n"); | |
for (ip = 0; ip < 30; ip++) { | |
printf("imem[%2i]: %s, %d, %d\n", ip, op2name(GETOP(imem[ip])), GETARG(imem[ip]), GETIMM(imem[ip])); | |
} | |
#endif | |
/* patch unresolved function calls */ | |
for (ip = 0; ip < pcount; ip++) { | |
int ret = getOff(fmapping, fcount, topatch[ip].name); | |
if (ret == -1) { | |
fprintf(stderr, "func not in map: \"%s\"\n", topatch[ip].name); exit(1); | |
} else { | |
int rip = topatch[ip].off; | |
imem[rip] = OP(GETOP(imem[rip])) | IMM(ret); | |
} | |
} | |
#ifdef DEBUG | |
printf("\nafter patch, imem dump:\n"); | |
for (ip = 0; ip < 30; ip++) { | |
printf("imem[%2i]: %s, %d, %d\n", ip, op2name(GETOP(imem[ip])), GETARG(imem[ip]), GETIMM(imem[ip])); | |
} | |
#endif | |
ip = 0; | |
vm(); | |
printf("\nsp = %d\n", sp); | |
printf("dmem[sp] = %d\n\n", dmem[sp]); | |
#ifdef DEBUG | |
for (ip = 0; ip < 10; ip++) { | |
printf("dmem[%2i]: %d\n", ip, dmem[ip]); | |
} | |
#endif | |
exit(0); | |
} | |
void parse(char *input) | |
{ | |
while(*input) { | |
if (strncmp("FUNC", input, 4) == 0) { | |
input = pfunc(input + 4); | |
imem[ip] = OP(RET) | ARG(acount); | |
ip++; | |
} else { | |
fprintf(stderr, "parse: %s\n", input); exit(1); | |
} | |
} | |
} | |
char *pfunc(char *input) | |
{ | |
int len = 0; | |
acount = 0; | |
memset(amapping, 0, sizeof(amapping)); | |
while (isBlank(input)) { input++; } | |
while (!isBlank(input)) { | |
input++; len++; | |
} | |
strncpy(fmapping[fcount].name, input - len, len > 10 ? 10 : len); | |
fmapping[fcount].off = ip; | |
fcount++; | |
while (isBlank(input)) { input++; } | |
if (*input != '(') { | |
fprintf(stderr, "missing \'(\'\n: %s\n", input); exit(1); | |
} | |
input++; | |
while (isBlank(input)) { input++; } | |
while (*input != ')') { | |
len = 0; | |
while (!isBlank(input) && *input != ')') { | |
input++; len++; | |
} | |
strncpy(amapping[acount].name, input - len, len > 10 ? 10 : len); | |
amapping[acount].off = acount; | |
acount++; | |
while (isBlank(input)) { input++; } | |
} | |
printmap(fmapping, fcount); | |
printmap(amapping, acount); | |
input++; | |
while (isBlank(input)) { input++; } | |
while (strncmp("END", input, 3) != 0) { | |
input = pexpr(input); | |
} | |
input += 3; | |
while (isBlank(input)) { input++; } | |
return input; | |
} | |
char *pexpr(char *input) | |
{ | |
if (isdigit(*input) || *input == '-') { | |
int t; | |
int n = 1; | |
if (*input == '-') { | |
n = -1; input++; | |
} | |
#ifdef DEBUG | |
printf("digit\n"); | |
#endif | |
sscanf(input, "%d", &t); | |
imem[ip] = OP(PUSHIMM) | IMM(t * n); | |
ip++; | |
while (isdigit(*input)) { input++; } | |
while (isBlank(input)) { input++; } | |
} else if (*input == '+') { | |
#ifdef DEBUG | |
printf("add\n"); | |
#endif | |
input++; | |
while (isBlank(input)) { input++; } | |
input = pexpr(input); | |
imem[ip] = OP(ADD); | |
ip++; | |
} else if (*input == '=') { | |
#ifdef DEBUG | |
printf("equ\n"); | |
#endif | |
input++; | |
while (isBlank(input)) { input++; } | |
input = pexpr(input); | |
imem[ip] = OP(EQU); | |
ip++; | |
} else if (strncmp("IF", input, 2) == 0) { | |
int ipthen, ipelse = -1; | |
input += 2; | |
#ifdef DEBUG | |
printf("ifthen\n"); | |
#endif | |
while (isBlank(input)) { input++; } | |
while (strncmp("THEN", input, 4) != 0) { | |
input = pexpr(input); | |
while (isBlank(input)) { input++; } | |
} | |
input += 4; | |
while (isBlank(input)) { input++; } | |
imem[ip] = OP(NOP); // platzhalter fuer bf | |
ipthen = ip; | |
ip++; | |
while ((strncmp("ELSE", input, 4) != 0) && (strncmp("END", input, 3) != 0)) { | |
input = pexpr(input); | |
while (isBlank(input)) { input++; } | |
} | |
if (strncmp("ELSE", input, 4) == 0) { | |
imem[ip] = OP(NOP); // platzhalter fuer br | |
ipelse = ip; | |
ip++; | |
imem[ipthen] = OP(BF) | IMM((ip - ipthen)); | |
input += 4; | |
while (isBlank(input)) { input++; } | |
while (strncmp("END", input, 3) != 0) { | |
input = pexpr(input); | |
while (isBlank(input)) { input++; } | |
} | |
} | |
if (ipelse != -1) { | |
imem[ipelse] = OP(BR) | IMM((ip - ipelse)); | |
} else { | |
imem[ipthen] = OP(BF) | IMM((ip - ipthen)); | |
} | |
if (strncmp("END", input, 3) == 0) { | |
input += 3; | |
} else { | |
fprintf(stderr, "missing \"END\": \"%s\"\n", input); exit(1); | |
} | |
while (isBlank(input)) { input++; } | |
} else { | |
#ifdef DEBUG | |
printf("param/call\n"); | |
#endif | |
int len = 0; | |
char ident[10] = {0}; | |
while (!isBlank(input)) { | |
input++; len++; | |
} | |
strncpy(ident, input - len, len > 10 ? 10 : len); | |
while (isBlank(input)) { input++; len++; } | |
if (*input == '(') { // call | |
#ifdef DEBUG | |
printf("call\n"); | |
#endif | |
int ins = OP(CALL) | ARG(acount) | IMM(getOff(fmapping, fcount, ident)); | |
while (isBlank(input)) { input++; } | |
input++; | |
while (*input != ')') { | |
while (isBlank(input)) { input++; } | |
input = pexpr(input); | |
} | |
input++; | |
imem[ip] = ins; | |
if (GETIMM(ins) == -1) { | |
strncpy(topatch[pcount].name, ident, strlen(ident) > 10 ? 10 : strlen(ident)); | |
topatch[pcount].off = ip; | |
pcount++; | |
} | |
ip++; | |
} else { | |
#ifdef DEBUG | |
printf("param\n"); | |
#endif | |
int erg = getOff(amapping, acount, ident); | |
if (GETIMM(erg) == -1) { | |
fprintf(stderr, "param (\"%s\") not found: \"%s\"\n=====\n", ident, input - len); exit(1); | |
printmap(amapping, acount); | |
} | |
imem[ip] = OP(PARAM) | ARG(acount) | IMM(erg); | |
ip++; | |
} | |
while (isBlank(input)) { input++; } | |
} | |
return input; | |
} | |
int isBlank(char *input) | |
{ | |
return *input == ' ' || *input == '\t' || *input == '\n'; | |
} | |
void vm() | |
{ | |
dmem[0] = 0x1337; | |
bp = 0; sp = 1; | |
while(1) { | |
uint32_t ins = imem[ip]; | |
#ifdef DEBUG | |
printf("\n@op[%2i]: %s, %d, %d\n", ip, op2name(GETOP(ins)), GETARG(ins), GETIMM(ins)); | |
printf("bp = %d, sp = %d, dmem[sp] = %d\n", bp, sp, dmem[sp]); | |
#endif | |
switch(GETOP(ins)) { | |
case NOP: | |
ip++; | |
break; | |
case PUSHIMM: | |
dmem[++sp] = GETIMM(ins); | |
ip++; | |
break; | |
case ADD: | |
dmem[sp - 1] += dmem[sp]; | |
sp--; | |
ip++; | |
break; | |
case EQU: | |
dmem[sp - 1] = dmem[sp - 1] == dmem[sp]; | |
sp--; | |
ip++; | |
break; | |
case PARAM: | |
sp++; | |
dmem[sp] = dmem[bp - (GETARG(ins) - GETIMM(ins))]; | |
ip++; | |
break; | |
case BR: | |
ip += GETIMM(ins); | |
break; | |
case BT: | |
if (dmem[sp]) { | |
ip += GETIMM(ins); | |
} else { | |
ip++; | |
} | |
sp--; | |
break; | |
case BF: | |
if (!dmem[sp]) { | |
ip += GETIMM(ins); | |
} else { | |
ip++; | |
} | |
sp--; | |
break; | |
case CALL: | |
{ | |
int oldip = ip + 1; | |
int oldbp = bp; | |
ip = GETIMM(ins); | |
bp = sp + 1; | |
sp = sp + 2; | |
/* save bp and sp */ | |
dmem[bp] = oldip; | |
dmem[bp + 1] = oldbp; | |
} | |
break; | |
case RET: | |
if (dmem[bp] == 0x1337) { | |
return; | |
} | |
{ | |
int retval = dmem[sp]; | |
ip = dmem[bp]; | |
sp = bp - GETARG(ins); | |
bp = dmem[bp + 1]; | |
/* push return value to caller stack */ | |
dmem[sp] = retval; | |
} | |
break; | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CFLAGS := -g -O2 -std=c99 -Wall -Werror | |
TARGET := 13122010 | |
all: $(TARGET) | |
clean: | |
rm -Rf *.o $(TARGET) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment