Created
November 3, 2014 13:56
-
-
Save MrSmith33/7483b8decaa8bafed7a5 to your computer and use it in GitHub Desktop.
Tiny C
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* file: "tinyc.d" */ | |
/* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */ | |
// D port by Mr.Smith (C) 2014 | |
import std.stdio; | |
import std.algorithm : equal; | |
/* | |
* This is a compiler for the Tiny-C language. Tiny-C is a | |
* considerably stripped down version of C and it is meant as a | |
* pedagogical tool for learning about compilers. The integer global | |
* variables "a" to "z" are predefined and initialized to zero, and it | |
* is not possible to declare new variables. The compiler reads the | |
* program from standard input and prints out the value of the | |
* variables that are not zero. The grammar of Tiny-C in EBNF is: | |
* | |
* <program> ::= <statement> | |
* <statement> ::= "if" <paren_expr> <statement> | | |
* "if" <paren_expr> <statement> "else" <statement> | | |
* "while" <paren_expr> <statement> | | |
* "do" <statement> "while" <paren_expr> ";" | | |
* "{" { <statement> } "}" | | |
* <expr> ";" | | |
* ";" | |
* <paren_expr> ::= "(" <expr> ")" | |
* <expr> ::= <test> | <id> "=" <expr> | |
* <test> ::= <sum> | <sum> "<" <sum> | |
* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> | |
* <term> ::= <id> | <int> | <paren_expr> | |
* <id> ::= "a" | "b" | "c" | "d" | ... | "z" | |
* <int> ::= <an_unsigned_decimal_integer> | |
* | |
* Here are a few invocations of the compiler: | |
* | |
* % echo "a=b=c=2<3;" | ./a.out | |
* a = 1 | |
* b = 1 | |
* c = 1 | |
* % echo "{ i=1; while (i<100) i=i+i; }" | ./a.out | |
* i = 128 | |
* % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" | ./a.out | |
* i = 25 | |
* j = 25 | |
* % echo "{ i=1; do i=i+10; while (i<50); }" | ./a.out | |
* i = 51 | |
* % echo "{ i=1; while ((i=i+10)<50) ; }" | ./a.out | |
* i = 51 | |
* % echo "{ i=7; if (i<5) n=1; if (i<10) y=2; }" | ./a.out | |
* i = 7 | |
* y = 2 | |
* | |
* The compiler does a minimal amount of error checking to help | |
* highlight the structure of the compiler. | |
*/ | |
/*---------------------------------------------------------------------------*/ | |
/* Lexer. */ | |
enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR, | |
PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI }; | |
string[] words = [ "do", "else", "if", "while", null ]; | |
char ch = ' '; | |
int sym; | |
int int_val; | |
char[100] id_name; | |
void syntax_error() { assert(false, "syntax error"); } | |
void next_ch() { ch = cast(char)getchar(); } | |
void next_sym() | |
{ | |
while(ch == ' ' || ch == '\n') next_ch(); | |
switch (ch) | |
{ | |
case 255: sym = EOI; break; //EOF | |
case '{': next_ch(); sym = LBRA; break; | |
case '}': next_ch(); sym = RBRA; break; | |
case '(': next_ch(); sym = LPAR; break; | |
case ')': next_ch(); sym = RPAR; break; | |
case '+': next_ch(); sym = PLUS; break; | |
case '-': next_ch(); sym = MINUS; break; | |
case '<': next_ch(); sym = LESS; break; | |
case ';': next_ch(); sym = SEMI; break; | |
case '=': next_ch(); sym = EQUAL; break; | |
default: | |
{ | |
if (ch >= '0' && ch <= '9') | |
{ | |
int_val = 0; /* missing overflow check */ | |
while (ch >= '0' && ch <= '9') | |
{ | |
int_val = int_val*10 + (ch - '0'); | |
next_ch(); | |
} | |
sym = INT; | |
} | |
else if (ch >= 'a' && ch <= 'z') | |
{ | |
size_t i = 0; /* missing overflow check */ | |
while ((ch >= 'a' && ch <= 'z') || ch == '_') | |
{ | |
id_name[i++] = ch; | |
next_ch(); | |
} | |
id_name[i] = '\0'; | |
sym = 0; | |
while (words[sym] !is null && !equal(words[sym], id_name[0..i])) | |
{ | |
sym = sym + 1; | |
} | |
if (words[sym] is null) | |
{ | |
if (id_name[1] == '\0') | |
sym = ID; | |
else | |
syntax_error(); | |
} | |
} | |
else | |
syntax_error(); | |
} | |
} | |
} | |
/*---------------------------------------------------------------------------*/ | |
/* Parser. */ | |
enum { VAR, CST, ADD, SUB, LT, SET, | |
IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG } | |
struct Node | |
{ | |
int kind; | |
Node* o1, o2, o3; | |
int val; | |
} | |
Node* new_Node(int k) | |
{ | |
Node* n = new Node; | |
n.kind = k; | |
return n; | |
} | |
Node* term() /* <term> ::= <id> | <int> | <paren_expr> */ | |
{ | |
Node* n; | |
if (sym == ID) { n=new_Node(VAR); n.val=id_name[0]-'a'; next_sym(); } | |
else if (sym == INT) { n=new_Node(CST); n.val=int_val; next_sym(); } | |
else n = paren_expr(); | |
return n; | |
} | |
Node* sum() /* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> */ | |
{ | |
Node* t, n = term(); | |
while (sym == PLUS || sym == MINUS) | |
{ | |
t=n; n=new_Node(sym==PLUS?ADD:SUB); next_sym(); n.o1=t; n.o2=term(); | |
} | |
return n; | |
} | |
Node* test() /* <test> ::= <sum> | <sum> "<" <sum> */ | |
{ | |
Node* t, n = sum(); | |
if (sym == LESS) | |
{ | |
t=n; n=new_Node(LT); next_sym(); n.o1=t; n.o2=sum(); | |
} | |
return n; | |
} | |
Node* expr() /* <expr> ::= <test> | <id> "=" <expr> */ | |
{ | |
Node* t, n; | |
if (sym != ID) return test(); | |
n = test(); | |
if (n.kind == VAR && sym == EQUAL) | |
{ | |
t=n; n=new_Node(SET); next_sym(); n.o1=t; n.o2=expr(); | |
} | |
return n; | |
} | |
Node* paren_expr() /* <paren_expr> ::= "(" <expr> ")" */ | |
{ | |
Node* n; | |
if (sym == LPAR) next_sym(); else syntax_error(); | |
n = expr(); | |
if (sym == RPAR) next_sym(); else syntax_error(); | |
return n; | |
} | |
Node* statement() | |
{ | |
Node* n; | |
if (sym == IF_SYM) /* "if" <paren_expr> <statement> */ | |
{ | |
n = new_Node(IF1); | |
next_sym(); | |
n.o1 = paren_expr(); | |
n.o2 = statement(); | |
if (sym == ELSE_SYM) /* ... "else" <statement> */ | |
{ | |
n.kind = IF2; | |
next_sym(); | |
n.o3 = statement(); | |
} | |
} | |
else if (sym == WHILE_SYM) /* "while" <paren_expr> <statement> */ | |
{ | |
n = new_Node(WHILE); | |
next_sym(); | |
n.o1 = paren_expr(); | |
n.o2 = statement(); | |
} | |
else if (sym == DO_SYM) /* "do" <statement> "while" <paren_expr> ";" */ | |
{ | |
n = new_Node(DO); | |
next_sym(); | |
n.o1 = statement(); | |
if (sym == WHILE_SYM) | |
next_sym(); | |
else | |
syntax_error(); | |
n.o2 = paren_expr(); | |
if (sym == SEMI) | |
next_sym(); | |
else | |
syntax_error(); | |
} | |
else if (sym == SEMI) /* ";" */ | |
{ | |
n = new_Node(EMPTY); | |
next_sym(); | |
} | |
else if (sym == LBRA) /* "{" { <statement> } "}" */ | |
{ | |
n = new_Node(EMPTY); | |
next_sym(); | |
while (sym != RBRA) | |
{ | |
Node* temp = n; | |
n = new_Node(SEQ); | |
n.o1 = temp; | |
n.o2 = statement(); | |
} | |
next_sym(); | |
} | |
else /* <expr> ";" */ | |
{ | |
n = new_Node(EXPR); | |
n.o1 = expr(); | |
if (sym == SEMI) | |
next_sym(); | |
else | |
syntax_error(); | |
} | |
return n; | |
} | |
Node* program() /* <program> ::= <statement> */ | |
{ | |
Node* n = new_Node(PROG); | |
next_sym(); | |
n.o1 = statement(); | |
if (sym != EOI) | |
syntax_error(); | |
return n; | |
} | |
/*---------------------------------------------------------------------------*/ | |
/* Code generator. */ | |
enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT }; | |
alias code = byte; | |
code[1000] _object; // executable | |
code* pc; | |
static this() | |
{ | |
pc = &_object[0]; | |
} | |
void put(code c) { *pc++ = c; } /* missing overflow check */ | |
code* hole() { return pc++; } | |
void fix(code* src, code* dst) { *src = cast(code)(dst - src); } /* missing overflow check */ | |
void compile(Node* n) | |
{ | |
code* p1, p2; | |
switch (n.kind) | |
{ | |
case VAR : put(IFETCH); put(cast(code)n.val); break; | |
case CST : put(IPUSH); put(cast(code)n.val); break; | |
case ADD : compile(n.o1); compile(n.o2); put(IADD); break; | |
case SUB : compile(n.o1); compile(n.o2); put(ISUB); break; | |
case LT : compile(n.o1); compile(n.o2); put(ILT); break; | |
case SET : compile(n.o2); put(ISTORE); put(cast(code)n.o1.val); break; | |
case IF1 : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); fix(p1,pc); break; | |
case IF2 : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); put(JMP); p2=hole(); | |
fix(p1,pc); compile(n.o3); fix(p2,pc); break; | |
case WHILE: p1=pc; compile(n.o1); put(JZ); p2=hole(); compile(n.o2); | |
put(JMP); fix(hole(),p1); fix(p2,pc); break; | |
case DO : p1=pc; compile(n.o1); compile(n.o2); put(JNZ); fix(hole(),p1); break; | |
case EMPTY: break; | |
case SEQ : compile(n.o1); compile(n.o2); break; | |
case EXPR : compile(n.o1); put(IPOP); break; | |
case PROG : compile(n.o1); put(HALT); break; | |
default: break; | |
} | |
} | |
/*---------------------------------------------------------------------------*/ | |
/* Virtual machine. */ | |
struct VM | |
{ | |
int[26] globals; | |
void run() | |
{ | |
int[1000] stack; | |
int* sp = stack.ptr; | |
code* pc = _object.ptr; | |
loop: while(true) | |
{ | |
final switch (*pc++) | |
{ | |
case IFETCH: *sp++ = globals[*pc++]; break; | |
case ISTORE: globals[*pc++] = sp[-1]; break; | |
case IPUSH : *sp++ = *pc++; break; | |
case IPOP : --sp; break; | |
case IADD : sp[-2] = sp[-2] + sp[-1]; --sp; break; | |
case ISUB : sp[-2] = sp[-2] - sp[-1]; --sp; break; | |
case ILT : sp[-2] = sp[-2] < sp[-1]; --sp; break; | |
case JMP : pc += *pc; break; | |
case JZ : if (*--sp == 0) pc += *pc; else pc++; break; | |
case JNZ : if (*--sp != 0) pc += *pc; else pc++; break; | |
case HALT: break loop; | |
} | |
} | |
} | |
} | |
/*---------------------------------------------------------------------------*/ | |
/* Main program. */ | |
int main() | |
{ | |
compile(program()); | |
VM vm; | |
vm.run(); | |
foreach(i, v; vm.globals) | |
if (v != 0) | |
writefln("%s = %s", cast(char)('a'+i), v); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment