seanjensengrey · October 22, 2025 01:26 · felix91gr · Oct 17, 2018 · gusteivos · Aug 24, 2024
diff --git a/tiny.c b/tiny.c
 /* file: "tinyc.c" */
 /* originally from http://www.iro.umontreal.ca/~felipe/IFT2030-Automne2002/Complements/tinyc.c */ 

 /* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */

 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

 /* 
 * This is a compiler for the Tiny-C language.  Tiny-C is a
 * considerably stripped down version of C and it is meant as a
 * pedagogical tool for learning about compilers.  The integer global
 * variables "a" to "z" are predefined and initialized to zero, and it
 * is not possible to declare new variables.  The compiler reads the
 * program from standard input and prints out the value of the
 * variables that are not zero.  The grammar of Tiny-C in EBNF is:
 *
 *  <program> ::= <statement>
 *  <statement> ::= "if" <paren_expr> <statement> |
 *                  "if" <paren_expr> <statement> "else" <statement> |
 *                  "while" <paren_expr> <statement> |
 *                  "do" <statement> "while" <paren_expr> ";" |
 *                  "{" { <statement> } "}" |
 *                  <expr> ";" |
 *                  ";"
 *  <paren_expr> ::= "(" <expr> ")"
 *  <expr> ::= <test> | <id> "=" <expr>
 *  <test> ::= <sum> | <sum> "<" <sum>
 *  <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term>
 *  <term> ::= <id> | <int> | <paren_expr>
 *  <id> ::= "a" | "b" | "c" | "d" | ... | "z"
 *  <int> ::= <an_unsigned_decimal_integer>
 *
 * Here are a few invocations of the compiler:
 *
 * % echo "a=b=c=2<3;" | ./a.out
 * a = 1
 * b = 1
 * c = 1
 * % echo "{ i=1; while (i<100) i=i+i; }" | ./a.out
 * i = 128
 * % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" | ./a.out
 * i = 25
 * j = 25
 * % echo "{ i=1; do i=i+10; while (i<50); }" | ./a.out
 * i = 51
 * % echo "{ i=1; while ((i=i+10)<50) ; }" | ./a.out
 * i = 51
 * % echo "{ i=7; if (i<5) x=1; if (i<10) y=2; }" | ./a.out
 * i = 7
 * y = 2
 *
 * The compiler does a minimal amount of error checking to help
 * highlight the structure of the compiler.
 */


 /*---------------------------------------------------------------------------*/

 /* Lexer. */

 enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR,
       PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI };

 char *words[] = { "do", "else", "if", "while", NULL };

 int ch = ' ';
 int sym;
 int int_val;
 char id_name[100];

 void syntax_error() { fprintf(stderr, "syntax error\n"); exit(1); }

 void next_ch() { ch = getchar(); }

 void next_sym()
 { again: switch (ch)
    { case ' ': case '\n': next_ch(); goto again;
      case EOF: sym = EOI; break;
      case '{': next_ch(); sym = LBRA; break;
      case '}': next_ch(); sym = RBRA; break;
      case '(': next_ch(); sym = LPAR; break;
      case ')': next_ch(); sym = RPAR; break;
      case '+': next_ch(); sym = PLUS; break;
      case '-': next_ch(); sym = MINUS; break;
      case '<': next_ch(); sym = LESS; break;
      case ';': next_ch(); sym = SEMI; break;
      case '=': next_ch(); sym = EQUAL; break;
      default:
        if (ch >= '0' && ch <= '9')
          { int_val = 0; /* missing overflow check */
            while (ch >= '0' && ch <= '9')
              { int_val = int_val*10 + (ch - '0'); next_ch(); }
            sym = INT;
          }
        else if (ch >= 'a' && ch <= 'z')
          { int i = 0; /* missing overflow check */
            while ((ch >= 'a' && ch <= 'z') || ch == '_')
              { id_name[i++] = ch; next_ch(); }
            id_name[i] = '\0';
            sym = 0;
            while (words[sym] != NULL && strcmp(words[sym], id_name) != 0)
              sym++;
            if (words[sym] == NULL)
              if (id_name[1] == '\0') sym = ID; else syntax_error();
          }
        else
          syntax_error();
    }
 }

 /*---------------------------------------------------------------------------*/

 /* Parser. */

 enum { VAR, CST, ADD, SUB, LT, SET,
       IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG };

 struct node { int kind; struct node *o1, *o2, *o3; int val; };
 typedef struct node node;

 node *new_node(int k)
 { node *x = (node*)malloc(sizeof(node)); x->kind = k; return x; }

 node *paren_expr(); /* forward declaration */

 node *term()  /* <term> ::= <id> | <int> | <paren_expr> */
 { node *x;
  if (sym == ID) { x=new_node(VAR); x->val=id_name[0]-'a'; next_sym(); }
  else if (sym == INT) { x=new_node(CST); x->val=int_val; next_sym(); }
  else x = paren_expr();
  return x;
 }

 node *sum()  /* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> */
 { node *t, *x = term();
  while (sym == PLUS || sym == MINUS)
    { t=x; x=new_node(sym==PLUS?ADD:SUB); next_sym(); x->o1=t; x->o2=term(); }
  return x;
 }

 node *test()  /* <test> ::= <sum> | <sum> "<" <sum> */
 { node *t, *x = sum();
  if (sym == LESS)
    { t=x; x=new_node(LT); next_sym(); x->o1=t; x->o2=sum(); }
  return x;
 }

 node *expr()  /* <expr> ::= <test> | <id> "=" <expr> */
 { node *t, *x;
  if (sym != ID) return test();
  x = test();
  if (x->kind == VAR && sym == EQUAL)
    { t=x; x=new_node(SET); next_sym(); x->o1=t; x->o2=expr(); }
  return x;
 }

 node *paren_expr()  /* <paren_expr> ::= "(" <expr> ")" */
 { node *x;
  if (sym == LPAR) next_sym(); else syntax_error();
  x = expr();
  if (sym == RPAR) next_sym(); else syntax_error();
  return x;
 }

 node *statement()
 { node *t, *x;
  if (sym == IF_SYM)  /* "if" <paren_expr> <statement> */
    { x = new_node(IF1);
      next_sym();
      x->o1 = paren_expr();
      x->o2 = statement();
      if (sym == ELSE_SYM)  /* ... "else" <statement> */
        { x->kind = IF2;
          next_sym();
          x->o3 = statement();
        }
    }
  else if (sym == WHILE_SYM)  /* "while" <paren_expr> <statement> */
    { x = new_node(WHILE);
      next_sym();
      x->o1 = paren_expr();
      x->o2 = statement();
    }
  else if (sym == DO_SYM)  /* "do" <statement> "while" <paren_expr> ";" */
    { x = new_node(DO);
      next_sym();
      x->o1 = statement();
      if (sym == WHILE_SYM) next_sym(); else syntax_error();
      x->o2 = paren_expr();
      if (sym == SEMI) next_sym(); else syntax_error();
    }
  else if (sym == SEMI)  /* ";" */
    { x = new_node(EMPTY); next_sym(); }
  else if (sym == LBRA)  /* "{" { <statement> } "}" */
    { x = new_node(EMPTY);
      next_sym();
      while (sym != RBRA)
        { t=x; x=new_node(SEQ); x->o1=t; x->o2=statement(); }
      next_sym();
    }
  else  /* <expr> ";" */
    { x = new_node(EXPR);
      x->o1 = expr();
      if (sym == SEMI) next_sym(); else syntax_error();
    }
  return x;
 }

 node *program()  /* <program> ::= <statement> */
 { node *x = new_node(PROG);
  next_sym(); x->o1 = statement(); if (sym != EOI) syntax_error();
  return x;
 }

 /*---------------------------------------------------------------------------*/

 /* Code generator. */

 enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT };

 typedef char code;
 code object[1000], *here = object;

 void g(code c) { *here++ = c; } /* missing overflow check */
 code *hole() { return here++; }
 void fix(code *src, code *dst) { *src = dst-src; } /* missing overflow check */

 void c(node *x)
 { code *p1, *p2;
  switch (x->kind)
    { case VAR  : g(IFETCH); g(x->val); break;
      case CST  : g(IPUSH); g(x->val); break;
      case ADD  : c(x->o1); c(x->o2); g(IADD); break;
      case SUB  : c(x->o1); c(x->o2); g(ISUB); break;
      case LT   : c(x->o1); c(x->o2); g(ILT); break;
      case SET  : c(x->o2); g(ISTORE); g(x->o1->val); break;
      case IF1  : c(x->o1); g(JZ); p1=hole(); c(x->o2); fix(p1,here); break;
      case IF2  : c(x->o1); g(JZ); p1=hole(); c(x->o2); g(JMP); p2=hole();
                  fix(p1,here); c(x->o3); fix(p2,here); break;
      case WHILE: p1=here; c(x->o1); g(JZ); p2=hole(); c(x->o2);
                  g(JMP); fix(hole(),p1); fix(p2,here); break;
      case DO   : p1=here; c(x->o1); c(x->o2); g(JNZ); fix(hole(),p1); break;
      case EMPTY: break;
      case SEQ  : c(x->o1); c(x->o2); break;
      case EXPR : c(x->o1); g(IPOP); break;
      case PROG : c(x->o1); g(HALT); break;
    }
 }

 /*---------------------------------------------------------------------------*/

 /* Virtual machine. */

 int globals[26];

 void run()
 { int stack[1000], *sp = stack;
  code *pc = object;
  again: switch (*pc++)
    { case IFETCH: *sp++ = globals[*pc++];               goto again;
      case ISTORE: globals[*pc++] = sp[-1];              goto again;
      case IPUSH : *sp++ = *pc++;                        goto again;
      case IPOP  : --sp;                                 goto again;
      case IADD  : sp[-2] = sp[-2] + sp[-1]; --sp;       goto again;
      case ISUB  : sp[-2] = sp[-2] - sp[-1]; --sp;       goto again;
      case ILT   : sp[-2] = sp[-2] < sp[-1]; --sp;       goto again;
      case JMP   : pc += *pc;                            goto again;
      case JZ    : if (*--sp == 0) pc += *pc; else pc++; goto again;
      case JNZ   : if (*--sp != 0) pc += *pc; else pc++; goto again;
    }
 }

 /*---------------------------------------------------------------------------*/

 /* Main program. */

 int main()
 { int i;

  c(program());

  for (i=0; i<26; i++)
    globals[i] = 0;
  run();
  for (i=0; i<26; i++)
    if (globals[i] != 0)
      printf("%c = %d\n", 'a'+i, globals[i]);

  return 0;
 }
	/* file: "tinyc.c" */
	/* originally from http://www.iro.umontreal.ca/~felipe/IFT2030-Automne2002/Complements/tinyc.c */

	/* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */

	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>

	/*
	* This is a compiler for the Tiny-C language. Tiny-C is a
	* considerably stripped down version of C and it is meant as a
	* pedagogical tool for learning about compilers. The integer global
	* variables "a" to "z" are predefined and initialized to zero, and it
	* is not possible to declare new variables. The compiler reads the
	* program from standard input and prints out the value of the
	* variables that are not zero. The grammar of Tiny-C in EBNF is:
	*
	* <program> ::= <statement>
	* <statement> ::= "if" <paren_expr> <statement> \|
	* "if" <paren_expr> <statement> "else" <statement> \|
	* "while" <paren_expr> <statement> \|
	* "do" <statement> "while" <paren_expr> ";" \|
	* "{" { <statement> } "}" \|
	* <expr> ";" \|
	* ";"
	* <paren_expr> ::= "(" <expr> ")"
	* <expr> ::= <test> \| <id> "=" <expr>
	* <test> ::= <sum> \| <sum> "<" <sum>
	* <sum> ::= <term> \| <sum> "+" <term> \| <sum> "-" <term>
	* <term> ::= <id> \| <int> \| <paren_expr>
	* <id> ::= "a" \| "b" \| "c" \| "d" \| ... \| "z"
	* <int> ::= <an_unsigned_decimal_integer>
	*
	* Here are a few invocations of the compiler:
	*
	* % echo "a=b=c=2<3;" \| ./a.out
	* a = 1
	* b = 1
	* c = 1
	* % echo "{ i=1; while (i<100) i=i+i; }" \| ./a.out
	* i = 128
	* % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" \| ./a.out
	* i = 25
	* j = 25
	* % echo "{ i=1; do i=i+10; while (i<50); }" \| ./a.out
	* i = 51
	* % echo "{ i=1; while ((i=i+10)<50) ; }" \| ./a.out
	* i = 51
	* % echo "{ i=7; if (i<5) x=1; if (i<10) y=2; }" \| ./a.out
	* i = 7
	* y = 2
	*
	* The compiler does a minimal amount of error checking to help
	* highlight the structure of the compiler.
	*/


	/---------------------------------------------------------------------------/

	/* Lexer. */

	enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR,
	PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI };

	char *words[] = { "do", "else", "if", "while", NULL };

	int ch = ' ';
	int sym;
	int int_val;
	char id_name[100];

	void syntax_error() { fprintf(stderr, "syntax error\n"); exit(1); }

	void next_ch() { ch = getchar(); }

	void next_sym()
	{ again: switch (ch)
	{ case ' ': case '\n': next_ch(); goto again;
	case EOF: sym = EOI; break;
	case '{': next_ch(); sym = LBRA; break;
	case '}': next_ch(); sym = RBRA; break;
	case '(': next_ch(); sym = LPAR; break;
	case ')': next_ch(); sym = RPAR; break;
	case '+': next_ch(); sym = PLUS; break;
	case '-': next_ch(); sym = MINUS; break;
	case '<': next_ch(); sym = LESS; break;
	case ';': next_ch(); sym = SEMI; break;
	case '=': next_ch(); sym = EQUAL; break;
	default:
	if (ch >= '0' && ch <= '9')
	{ int_val = 0; /* missing overflow check */
	while (ch >= '0' && ch <= '9')
	{ int_val = int_val*10 + (ch - '0'); next_ch(); }
	sym = INT;
	}
	else if (ch >= 'a' && ch <= 'z')
	{ int i = 0; /* missing overflow check */
	while ((ch >= 'a' && ch <= 'z') \|\| ch == '_')
	{ id_name[i++] = ch; next_ch(); }
	id_name[i] = '\0';
	sym = 0;
	while (words[sym] != NULL && strcmp(words[sym], id_name) != 0)
	sym++;
	if (words[sym] == NULL)
	if (id_name[1] == '\0') sym = ID; else syntax_error();
	}
	else
	syntax_error();
	}
	}

	/---------------------------------------------------------------------------/

	/* Parser. */

	enum { VAR, CST, ADD, SUB, LT, SET,
	IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG };

	struct node { int kind; struct node o1, o2, *o3; int val; };
	typedef struct node node;

	node *new_node(int k)
	{ node x = (node)malloc(sizeof(node)); x->kind = k; return x; }

	node paren_expr(); / forward declaration */

	node term() / <term> ::= <id> \| <int> \| <paren_expr> */
	{ node *x;
	if (sym == ID) { x=new_node(VAR); x->val=id_name[0]-'a'; next_sym(); }
	else if (sym == INT) { x=new_node(CST); x->val=int_val; next_sym(); }
	else x = paren_expr();
	return x;
	}

	node sum() / <sum> ::= <term> \| <sum> "+" <term> \| <sum> "-" <term> */
	{ node t, x = term();
	while (sym == PLUS \|\| sym == MINUS)
	{ t=x; x=new_node(sym==PLUS?ADD:SUB); next_sym(); x->o1=t; x->o2=term(); }
	return x;
	}

	node test() / <test> ::= <sum> \| <sum> "<" <sum> */
	{ node t, x = sum();
	if (sym == LESS)
	{ t=x; x=new_node(LT); next_sym(); x->o1=t; x->o2=sum(); }
	return x;
	}

	node expr() / <expr> ::= <test> \| <id> "=" <expr> */
	{ node t, x;
	if (sym != ID) return test();
	x = test();
	if (x->kind == VAR && sym == EQUAL)
	{ t=x; x=new_node(SET); next_sym(); x->o1=t; x->o2=expr(); }
	return x;
	}

	node paren_expr() / <paren_expr> ::= "(" <expr> ")" */
	{ node *x;
	if (sym == LPAR) next_sym(); else syntax_error();
	x = expr();
	if (sym == RPAR) next_sym(); else syntax_error();
	return x;
	}

	node *statement()
	{ node t, x;
	if (sym == IF_SYM) /* "if" <paren_expr> <statement> */
	{ x = new_node(IF1);
	next_sym();
	x->o1 = paren_expr();
	x->o2 = statement();
	if (sym == ELSE_SYM) /* ... "else" <statement> */
	{ x->kind = IF2;
	next_sym();
	x->o3 = statement();
	}
	}
	else if (sym == WHILE_SYM) /* "while" <paren_expr> <statement> */
	{ x = new_node(WHILE);
	next_sym();
	x->o1 = paren_expr();
	x->o2 = statement();
	}
	else if (sym == DO_SYM) /* "do" <statement> "while" <paren_expr> ";" */
	{ x = new_node(DO);
	next_sym();
	x->o1 = statement();
	if (sym == WHILE_SYM) next_sym(); else syntax_error();
	x->o2 = paren_expr();
	if (sym == SEMI) next_sym(); else syntax_error();
	}
	else if (sym == SEMI) /* ";" */
	{ x = new_node(EMPTY); next_sym(); }
	else if (sym == LBRA) /* "{" { <statement> } "}" */
	{ x = new_node(EMPTY);
	next_sym();
	while (sym != RBRA)
	{ t=x; x=new_node(SEQ); x->o1=t; x->o2=statement(); }
	next_sym();
	}
	else /* <expr> ";" */
	{ x = new_node(EXPR);
	x->o1 = expr();
	if (sym == SEMI) next_sym(); else syntax_error();
	}
	return x;
	}

	node program() / <program> ::= <statement> */
	{ node *x = new_node(PROG);
	next_sym(); x->o1 = statement(); if (sym != EOI) syntax_error();
	return x;
	}

	/---------------------------------------------------------------------------/

	/* Code generator. */

	enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT };

	typedef char code;
	code object[1000], *here = object;

	void g(code c) { here++ = c; } / missing overflow check */
	code *hole() { return here++; }
	void fix(code src, code dst) { src = dst-src; } / missing overflow check */

	void c(node *x)
	{ code p1, p2;
	switch (x->kind)
	{ case VAR : g(IFETCH); g(x->val); break;
	case CST : g(IPUSH); g(x->val); break;
	case ADD : c(x->o1); c(x->o2); g(IADD); break;
	case SUB : c(x->o1); c(x->o2); g(ISUB); break;
	case LT : c(x->o1); c(x->o2); g(ILT); break;
	case SET : c(x->o2); g(ISTORE); g(x->o1->val); break;
	case IF1 : c(x->o1); g(JZ); p1=hole(); c(x->o2); fix(p1,here); break;
	case IF2 : c(x->o1); g(JZ); p1=hole(); c(x->o2); g(JMP); p2=hole();
	fix(p1,here); c(x->o3); fix(p2,here); break;
	case WHILE: p1=here; c(x->o1); g(JZ); p2=hole(); c(x->o2);
	g(JMP); fix(hole(),p1); fix(p2,here); break;
	case DO : p1=here; c(x->o1); c(x->o2); g(JNZ); fix(hole(),p1); break;
	case EMPTY: break;
	case SEQ : c(x->o1); c(x->o2); break;
	case EXPR : c(x->o1); g(IPOP); break;
	case PROG : c(x->o1); g(HALT); break;
	}
	}

	/---------------------------------------------------------------------------/

	/* Virtual machine. */

	int globals[26];

	void run()
	{ int stack[1000], *sp = stack;
	code *pc = object;
	again: switch (*pc++)
	{ case IFETCH: sp++ = globals[pc++]; goto again;
	case ISTORE: globals[*pc++] = sp[-1]; goto again;
	case IPUSH : sp++ = pc++; goto again;
	case IPOP : --sp; goto again;
	case IADD : sp[-2] = sp[-2] + sp[-1]; --sp; goto again;
	case ISUB : sp[-2] = sp[-2] - sp[-1]; --sp; goto again;
	case ILT : sp[-2] = sp[-2] < sp[-1]; --sp; goto again;
	case JMP : pc += *pc; goto again;
	case JZ : if (--sp == 0) pc += pc; else pc++; goto again;
	case JNZ : if (--sp != 0) pc += pc; else pc++; goto again;
	}
	}

	/---------------------------------------------------------------------------/

	/* Main program. */

	int main()
	{ int i;

	c(program());

	for (i=0; i<26; i++)
	globals[i] = 0;
	run();
	for (i=0; i<26; i++)
	if (globals[i] != 0)
	printf("%c = %d\n", 'a'+i, globals[i]);

	return 0;
	}
No results found