MrSmith33 · November 3, 2014 13:56
diff --git a/tinyc.d b/tinyc.d
 /* file: "tinyc.d" */

 /* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */
 // D port by Mr.Smith (C) 2014

 import std.stdio;
 import std.algorithm : equal;

 /* 
 * This is a compiler for the Tiny-C language.  Tiny-C is a
 * considerably stripped down version of C and it is meant as a
 * pedagogical tool for learning about compilers.  The integer global
 * variables "a" to "z" are predefined and initialized to zero, and it
 * is not possible to declare new variables.  The compiler reads the
 * program from standard input and prints out the value of the
 * variables that are not zero.  The grammar of Tiny-C in EBNF is:
 *
 *  <program> ::= <statement>
 *  <statement> ::= "if" <paren_expr> <statement> |
 *                  "if" <paren_expr> <statement> "else" <statement> |
 *                  "while" <paren_expr> <statement> |
 *                  "do" <statement> "while" <paren_expr> ";" |
 *                  "{" { <statement> } "}" |
 *                  <expr> ";" |
 *                  ";"
 *  <paren_expr> ::= "(" <expr> ")"
 *  <expr> ::= <test> | <id> "=" <expr>
 *  <test> ::= <sum> | <sum> "<" <sum>
 *  <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term>
 *  <term> ::= <id> | <int> | <paren_expr>
 *  <id> ::= "a" | "b" | "c" | "d" | ... | "z"
 *  <int> ::= <an_unsigned_decimal_integer>
 *
 * Here are a few invocations of the compiler:
 *
 * % echo "a=b=c=2<3;" | ./a.out
 * a = 1
 * b = 1
 * c = 1
 * % echo "{ i=1; while (i<100) i=i+i; }" | ./a.out
 * i = 128
 * % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" | ./a.out
 * i = 25
 * j = 25
 * % echo "{ i=1; do i=i+10; while (i<50); }" | ./a.out
 * i = 51
 * % echo "{ i=1; while ((i=i+10)<50) ; }" | ./a.out
 * i = 51
 * % echo "{ i=7; if (i<5) n=1; if (i<10) y=2; }" | ./a.out
 * i = 7
 * y = 2
 *
 * The compiler does a minimal amount of error checking to help
 * highlight the structure of the compiler.
 */


 /*---------------------------------------------------------------------------*/

 /* Lexer. */

 enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR,
 			 PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI };
 string[] words = [ "do", "else", "if", "while", null ];

 char ch = ' ';
 int sym;
 int int_val;
 char[100] id_name;

 void syntax_error() { assert(false, "syntax error"); }

 void next_ch() { ch = cast(char)getchar(); }

 void next_sym()
 {
 	while(ch == ' ' || ch == '\n') next_ch();

 	switch (ch)
 	{
 		case 255: sym = EOI; break; //EOF
 		case '{': next_ch(); sym = LBRA; break;
 		case '}': next_ch(); sym = RBRA; break;
 		case '(': next_ch(); sym = LPAR; break;
 		case ')': next_ch(); sym = RPAR; break;
 		case '+': next_ch(); sym = PLUS; break;
 		case '-': next_ch(); sym = MINUS; break;
 		case '<': next_ch(); sym = LESS; break;
 		case ';': next_ch(); sym = SEMI; break;
 		case '=': next_ch(); sym = EQUAL; break;
 		default:
 		{
 			if (ch >= '0' && ch <= '9')
 			{
 				int_val = 0; /* missing overflow check */
 				while (ch >= '0' && ch <= '9')
 				{
 					int_val = int_val*10 + (ch - '0');
 					next_ch();
 				}
 				sym = INT;
 			}
 			else if (ch >= 'a' && ch <= 'z')
 			{
 				size_t i = 0; /* missing overflow check */
 				while ((ch >= 'a' && ch <= 'z') || ch == '_')
 				{
 					id_name[i++] = ch;
 					next_ch();
 				}
 				id_name[i] = '\0';
 				sym = 0;
 				while (words[sym] !is null && !equal(words[sym], id_name[0..i]))
 				{
 					sym = sym + 1;
 				}
 				if (words[sym] is null)
 				{
 					if (id_name[1] == '\0')
 						sym = ID;
 					else
 						syntax_error();
 				}
 			}
 			else
 				syntax_error();
 		}
 	}
 }

 /*---------------------------------------------------------------------------*/

 /* Parser. */

 enum { VAR, CST, ADD, SUB, LT, SET,
 			 IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG }

 struct Node
 {
 	int kind;
 	Node* o1, o2, o3;
 	int val;
 }

 Node* new_Node(int k)
 {
 	Node* n = new Node;
 	n.kind = k;
 	return n;
 }

 Node* term()  /* <term> ::= <id> | <int> | <paren_expr> */
 {
 	Node* n;
 	if (sym == ID) { n=new_Node(VAR); n.val=id_name[0]-'a'; next_sym(); }
 	else if (sym == INT) { n=new_Node(CST); n.val=int_val; next_sym(); }
 	else n = paren_expr();
 	return n;
 }

 Node* sum()  /* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> */
 {
 	Node* t, n = term();
 	while (sym == PLUS || sym == MINUS)
 	{
 		t=n; n=new_Node(sym==PLUS?ADD:SUB); next_sym(); n.o1=t; n.o2=term();
 	}
 	return n;
 }

 Node* test()  /* <test> ::= <sum> | <sum> "<" <sum> */
 {
 	Node* t, n = sum();
 	if (sym == LESS)
 	{
 		t=n; n=new_Node(LT); next_sym(); n.o1=t; n.o2=sum();
 	}
 	return n;
 }

 Node* expr()  /* <expr> ::= <test> | <id> "=" <expr> */
 {
 	Node* t, n;
 	if (sym != ID) return test();
 	n = test();
 	if (n.kind == VAR && sym == EQUAL)
 	{
 		t=n; n=new_Node(SET); next_sym(); n.o1=t; n.o2=expr();
 	}
 	return n;
 }

 Node* paren_expr()  /* <paren_expr> ::= "(" <expr> ")" */
 {
 	Node* n;
 	if (sym == LPAR) next_sym(); else syntax_error();
 	n = expr();
 	if (sym == RPAR) next_sym(); else syntax_error();
 	return n;
 }

 Node* statement()
 {
 	Node* n;

 	if (sym == IF_SYM)  /* "if" <paren_expr> <statement> */
 	{
 		n = new_Node(IF1);
 		next_sym();
 		n.o1 = paren_expr();
 		n.o2 = statement();
 		if (sym == ELSE_SYM)  /* ... "else" <statement> */
 		{
 			n.kind = IF2;
 			next_sym();
 			n.o3 = statement();
 		}
 	}
 	else if (sym == WHILE_SYM)  /* "while" <paren_expr> <statement> */
 	{
 		n = new_Node(WHILE);
 		next_sym();
 		n.o1 = paren_expr();
 		n.o2 = statement();
 	}
 	else if (sym == DO_SYM)  /* "do" <statement> "while" <paren_expr> ";" */
 	{
 		n = new_Node(DO);
 		next_sym();
 		n.o1 = statement();
 		if (sym == WHILE_SYM)
 			next_sym();
 		else
 			syntax_error();
 		n.o2 = paren_expr();
 		if (sym == SEMI)
 			next_sym();
 		else
 			syntax_error();
 	}
 	else if (sym == SEMI)  /* ";" */
 	{
 		n = new_Node(EMPTY);
 		next_sym();
 	}
 	else if (sym == LBRA)  /* "{" { <statement> } "}" */
 	{
 		n = new_Node(EMPTY);
 		next_sym();
 		while (sym != RBRA)
 		{
 			Node* temp = n;
 			n = new_Node(SEQ);
 			n.o1 = temp;
 			n.o2 = statement();
 		}
 		next_sym();
 	}
 	else  /* <expr> ";" */
 	{
 		n = new_Node(EXPR);
 		n.o1 = expr();
 		if (sym == SEMI)
 			next_sym();
 		else
 			syntax_error();
 	}
 	return n;
 }

 Node* program()  /* <program> ::= <statement> */
 {
 	Node* n = new_Node(PROG);
 	next_sym();
 	n.o1 = statement(); 
 	if (sym != EOI)
 		syntax_error();
 	return n;
 }

 /*---------------------------------------------------------------------------*/

 /* Code generator. */

 enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT };

 alias code = byte;
 code[1000] _object; // executable
 code* pc;

 static this()
 {
 	pc = &_object[0];
 }

 void put(code c) { *pc++ = c; } /* missing overflow check */
 code* hole() { return pc++; }
 void fix(code* src, code* dst) { *src = cast(code)(dst - src); } /* missing overflow check */

 void compile(Node* n)
 {
 	code* p1, p2;
 	switch (n.kind)
 	{
 		case VAR  : put(IFETCH); put(cast(code)n.val); break;
 		case CST  : put(IPUSH); put(cast(code)n.val); break;
 		case ADD  : compile(n.o1); compile(n.o2); put(IADD); break;
 		case SUB  : compile(n.o1); compile(n.o2); put(ISUB); break;
 		case LT   : compile(n.o1); compile(n.o2); put(ILT); break;
 		case SET  : compile(n.o2); put(ISTORE); put(cast(code)n.o1.val); break;
 		case IF1  : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); fix(p1,pc); break;
 		case IF2  : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); put(JMP); p2=hole();
 								fix(p1,pc); compile(n.o3); fix(p2,pc); break;
 		case WHILE: p1=pc; compile(n.o1); put(JZ); p2=hole(); compile(n.o2);
 								put(JMP); fix(hole(),p1); fix(p2,pc); break;
 		case DO   : p1=pc; compile(n.o1); compile(n.o2); put(JNZ); fix(hole(),p1); break;
 		case EMPTY: break;
 		case SEQ  : compile(n.o1); compile(n.o2); break;
 		case EXPR : compile(n.o1); put(IPOP); break;
 		case PROG : compile(n.o1); put(HALT); break;
 		default: break;
 	}
 }

 /*---------------------------------------------------------------------------*/

 /* Virtual machine. */
 struct VM
 {
 	int[26] globals;

 	void run()
 	{
 		int[1000] stack;
 		int* sp = stack.ptr;
 		code* pc = _object.ptr;

 		loop: while(true)
 		{
 			final switch (*pc++)
 			{
 				case IFETCH: *sp++ = globals[*pc++];               break;
 				case ISTORE: globals[*pc++] = sp[-1];              break;
 				case IPUSH : *sp++ = *pc++;                        break;
 				case IPOP  : --sp;                                 break;
 				case IADD  : sp[-2] = sp[-2] + sp[-1]; --sp;       break;
 				case ISUB  : sp[-2] = sp[-2] - sp[-1]; --sp;       break;
 				case ILT   : sp[-2] = sp[-2] < sp[-1]; --sp;       break;
 				case JMP   : pc += *pc;                            break;
 				case JZ    : if (*--sp == 0) pc += *pc; else pc++; break;
 				case JNZ   : if (*--sp != 0) pc += *pc; else pc++; break;
 				case HALT: break loop;
 			}
 		}
 	}
 }

 /*---------------------------------------------------------------------------*/

 /* Main program. */

 int main()
 {
 	compile(program());
 	VM vm;
 	vm.run();
 	foreach(i, v; vm.globals)
 		if (v != 0)
 			writefln("%s = %s", cast(char)('a'+i), v);

 	return 0;
 }
	/* file: "tinyc.d" */

	/* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */
	// D port by Mr.Smith (C) 2014

	import std.stdio;
	import std.algorithm : equal;

	/*
	* This is a compiler for the Tiny-C language. Tiny-C is a
	* considerably stripped down version of C and it is meant as a
	* pedagogical tool for learning about compilers. The integer global
	* variables "a" to "z" are predefined and initialized to zero, and it
	* is not possible to declare new variables. The compiler reads the
	* program from standard input and prints out the value of the
	* variables that are not zero. The grammar of Tiny-C in EBNF is:
	*
	* <program> ::= <statement>
	* <statement> ::= "if" <paren_expr> <statement> \|
	* "if" <paren_expr> <statement> "else" <statement> \|
	* "while" <paren_expr> <statement> \|
	* "do" <statement> "while" <paren_expr> ";" \|
	* "{" { <statement> } "}" \|
	* <expr> ";" \|
	* ";"
	* <paren_expr> ::= "(" <expr> ")"
	* <expr> ::= <test> \| <id> "=" <expr>
	* <test> ::= <sum> \| <sum> "<" <sum>
	* <sum> ::= <term> \| <sum> "+" <term> \| <sum> "-" <term>
	* <term> ::= <id> \| <int> \| <paren_expr>
	* <id> ::= "a" \| "b" \| "c" \| "d" \| ... \| "z"
	* <int> ::= <an_unsigned_decimal_integer>
	*
	* Here are a few invocations of the compiler:
	*
	* % echo "a=b=c=2<3;" \| ./a.out
	* a = 1
	* b = 1
	* c = 1
	* % echo "{ i=1; while (i<100) i=i+i; }" \| ./a.out
	* i = 128
	* % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" \| ./a.out
	* i = 25
	* j = 25
	* % echo "{ i=1; do i=i+10; while (i<50); }" \| ./a.out
	* i = 51
	* % echo "{ i=1; while ((i=i+10)<50) ; }" \| ./a.out
	* i = 51
	* % echo "{ i=7; if (i<5) n=1; if (i<10) y=2; }" \| ./a.out
	* i = 7
	* y = 2
	*
	* The compiler does a minimal amount of error checking to help
	* highlight the structure of the compiler.
	*/


	/---------------------------------------------------------------------------/

	/* Lexer. */

	enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR,
	PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI };
	string[] words = [ "do", "else", "if", "while", null ];

	char ch = ' ';
	int sym;
	int int_val;
	char[100] id_name;

	void syntax_error() { assert(false, "syntax error"); }

	void next_ch() { ch = cast(char)getchar(); }

	void next_sym()
	{
	while(ch == ' ' \|\| ch == '\n') next_ch();

	switch (ch)
	{
	case 255: sym = EOI; break; //EOF
	case '{': next_ch(); sym = LBRA; break;
	case '}': next_ch(); sym = RBRA; break;
	case '(': next_ch(); sym = LPAR; break;
	case ')': next_ch(); sym = RPAR; break;
	case '+': next_ch(); sym = PLUS; break;
	case '-': next_ch(); sym = MINUS; break;
	case '<': next_ch(); sym = LESS; break;
	case ';': next_ch(); sym = SEMI; break;
	case '=': next_ch(); sym = EQUAL; break;
	default:
	{
	if (ch >= '0' && ch <= '9')
	{
	int_val = 0; /* missing overflow check */
	while (ch >= '0' && ch <= '9')
	{
	int_val = int_val*10 + (ch - '0');
	next_ch();
	}
	sym = INT;
	}
	else if (ch >= 'a' && ch <= 'z')
	{
	size_t i = 0; /* missing overflow check */
	while ((ch >= 'a' && ch <= 'z') \|\| ch == '_')
	{
	id_name[i++] = ch;
	next_ch();
	}
	id_name[i] = '\0';
	sym = 0;
	while (words[sym] !is null && !equal(words[sym], id_name[0..i]))
	{
	sym = sym + 1;
	}
	if (words[sym] is null)
	{
	if (id_name[1] == '\0')
	sym = ID;
	else
	syntax_error();
	}
	}
	else
	syntax_error();
	}
	}
	}

	/---------------------------------------------------------------------------/

	/* Parser. */

	enum { VAR, CST, ADD, SUB, LT, SET,
	IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG }

	struct Node
	{
	int kind;
	Node* o1, o2, o3;
	int val;
	}

	Node* new_Node(int k)
	{
	Node* n = new Node;
	n.kind = k;
	return n;
	}

	Node* term() /* <term> ::= <id> \| <int> \| <paren_expr> */
	{
	Node* n;
	if (sym == ID) { n=new_Node(VAR); n.val=id_name[0]-'a'; next_sym(); }
	else if (sym == INT) { n=new_Node(CST); n.val=int_val; next_sym(); }
	else n = paren_expr();
	return n;
	}

	Node* sum() /* <sum> ::= <term> \| <sum> "+" <term> \| <sum> "-" <term> */
	{
	Node* t, n = term();
	while (sym == PLUS \|\| sym == MINUS)
	{
	t=n; n=new_Node(sym==PLUS?ADD:SUB); next_sym(); n.o1=t; n.o2=term();
	}
	return n;
	}

	Node* test() /* <test> ::= <sum> \| <sum> "<" <sum> */
	{
	Node* t, n = sum();
	if (sym == LESS)
	{
	t=n; n=new_Node(LT); next_sym(); n.o1=t; n.o2=sum();
	}
	return n;
	}

	Node* expr() /* <expr> ::= <test> \| <id> "=" <expr> */
	{
	Node* t, n;
	if (sym != ID) return test();
	n = test();
	if (n.kind == VAR && sym == EQUAL)
	{
	t=n; n=new_Node(SET); next_sym(); n.o1=t; n.o2=expr();
	}
	return n;
	}

	Node* paren_expr() /* <paren_expr> ::= "(" <expr> ")" */
	{
	Node* n;
	if (sym == LPAR) next_sym(); else syntax_error();
	n = expr();
	if (sym == RPAR) next_sym(); else syntax_error();
	return n;
	}

	Node* statement()
	{
	Node* n;

	if (sym == IF_SYM) /* "if" <paren_expr> <statement> */
	{
	n = new_Node(IF1);
	next_sym();
	n.o1 = paren_expr();
	n.o2 = statement();
	if (sym == ELSE_SYM) /* ... "else" <statement> */
	{
	n.kind = IF2;
	next_sym();
	n.o3 = statement();
	}
	}
	else if (sym == WHILE_SYM) /* "while" <paren_expr> <statement> */
	{
	n = new_Node(WHILE);
	next_sym();
	n.o1 = paren_expr();
	n.o2 = statement();
	}
	else if (sym == DO_SYM) /* "do" <statement> "while" <paren_expr> ";" */
	{
	n = new_Node(DO);
	next_sym();
	n.o1 = statement();
	if (sym == WHILE_SYM)
	next_sym();
	else
	syntax_error();
	n.o2 = paren_expr();
	if (sym == SEMI)
	next_sym();
	else
	syntax_error();
	}
	else if (sym == SEMI) /* ";" */
	{
	n = new_Node(EMPTY);
	next_sym();
	}
	else if (sym == LBRA) /* "{" { <statement> } "}" */
	{
	n = new_Node(EMPTY);
	next_sym();
	while (sym != RBRA)
	{
	Node* temp = n;
	n = new_Node(SEQ);
	n.o1 = temp;
	n.o2 = statement();
	}
	next_sym();
	}
	else /* <expr> ";" */
	{
	n = new_Node(EXPR);
	n.o1 = expr();
	if (sym == SEMI)
	next_sym();
	else
	syntax_error();
	}
	return n;
	}

	Node* program() /* <program> ::= <statement> */
	{
	Node* n = new_Node(PROG);
	next_sym();
	n.o1 = statement();
	if (sym != EOI)
	syntax_error();
	return n;
	}

	/---------------------------------------------------------------------------/

	/* Code generator. */

	enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT };

	alias code = byte;
	code[1000] _object; // executable
	code* pc;

	static this()
	{
	pc = &_object[0];
	}

	void put(code c) { pc++ = c; } / missing overflow check */
	code* hole() { return pc++; }
	void fix(code* src, code* dst) { src = cast(code)(dst - src); } / missing overflow check */

	void compile(Node* n)
	{
	code* p1, p2;
	switch (n.kind)
	{
	case VAR : put(IFETCH); put(cast(code)n.val); break;
	case CST : put(IPUSH); put(cast(code)n.val); break;
	case ADD : compile(n.o1); compile(n.o2); put(IADD); break;
	case SUB : compile(n.o1); compile(n.o2); put(ISUB); break;
	case LT : compile(n.o1); compile(n.o2); put(ILT); break;
	case SET : compile(n.o2); put(ISTORE); put(cast(code)n.o1.val); break;
	case IF1 : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); fix(p1,pc); break;
	case IF2 : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); put(JMP); p2=hole();
	fix(p1,pc); compile(n.o3); fix(p2,pc); break;
	case WHILE: p1=pc; compile(n.o1); put(JZ); p2=hole(); compile(n.o2);
	put(JMP); fix(hole(),p1); fix(p2,pc); break;
	case DO : p1=pc; compile(n.o1); compile(n.o2); put(JNZ); fix(hole(),p1); break;
	case EMPTY: break;
	case SEQ : compile(n.o1); compile(n.o2); break;
	case EXPR : compile(n.o1); put(IPOP); break;
	case PROG : compile(n.o1); put(HALT); break;
	default: break;
	}
	}

	/---------------------------------------------------------------------------/

	/* Virtual machine. */
	struct VM
	{
	int[26] globals;

	void run()
	{
	int[1000] stack;
	int* sp = stack.ptr;
	code* pc = _object.ptr;

	loop: while(true)
	{
	final switch (*pc++)
	{
	case IFETCH: sp++ = globals[pc++]; break;
	case ISTORE: globals[*pc++] = sp[-1]; break;
	case IPUSH : sp++ = pc++; break;
	case IPOP : --sp; break;
	case IADD : sp[-2] = sp[-2] + sp[-1]; --sp; break;
	case ISUB : sp[-2] = sp[-2] - sp[-1]; --sp; break;
	case ILT : sp[-2] = sp[-2] < sp[-1]; --sp; break;
	case JMP : pc += *pc; break;
	case JZ : if (--sp == 0) pc += pc; else pc++; break;
	case JNZ : if (--sp != 0) pc += pc; else pc++; break;
	case HALT: break loop;
	}
	}
	}
	}

	/---------------------------------------------------------------------------/

	/* Main program. */

	int main()
	{
	compile(program());
	VM vm;
	vm.run();
	foreach(i, v; vm.globals)
	if (v != 0)
	writefln("%s = %s", cast(char)('a'+i), v);

	return 0;
	}