Skip to content

Instantly share code, notes, and snippets.

@bencz
Last active December 15, 2015 00:39
Show Gist options
  • Save bencz/5174756 to your computer and use it in GitHub Desktop.
Save bencz/5174756 to your computer and use it in GitHub Desktop.
A test of compiler
using namespace System;
/* <stmt> := var <ident> = <expr>
| <ident> = <expr>
| for <ident> = <expr> to <expr> do <stmt> end
| read_int <ident>
| print <expr>
| <stmt> ; <stmt>
*/
public ref class Stmt abstract
{
};
/* <expr> := <string>
* | <int>
* | <arith_expr>
* | <ident>
*/
public ref class Expr abstract
{
};
// <bin_op> := + | - | * | /
public enum class BinOp
{
Add,
Sub,
Mul,
Div
};
// var <ident> = <expr>
public ref class DeclareVar : Stmt
{
public:
String^ Ident;
Expr^ Expr;
};
// print <expr>
public ref class Print : Stmt
{
public:
Expr^ Expr;
};
// <ident> = <expr>
public ref class Assign : Stmt
{
public:
String^ Ident;
Expr^ Expr;
};
// <stmt> ; <stmt>
public ref class Sequence : Stmt
{
public:
Stmt^ First;
Stmt^ Second;
};
// <string> := " <string_elem>* "
public ref class StringLiteral : Expr
{
public:
String^ Value;
};
// <int> := <digit>+
public ref class IntLiteral : Expr
{
public:
int Value;
};
// <ident> := <char> <ident_rest>*
// <ident_rest> := <char> | <digit>
public ref class Variable : Expr
{
public:
String^ Ident;
};
// <bin_expr> := <expr> <bin_op> <expr>
public ref class BinExpr : Expr
{
public:
Expr^ Left;
Expr^ Right;
BinOp Op;
};
#include "AST.h"
using namespace System;
using namespace System::IO;
using namespace System::Text;
using namespace System::Reflection;
using namespace System::Reflection::Emit;
using namespace System::Collections::Generic;
public ref class Scanner sealed
{
private:
initonly IList<Object^>^ result;
public:
Scanner(IO::TextReader^ input)
{
result = gcnew List<Object^>();
Scan(input);
}
Scanner(IO::TextReader^ input, IList<Object^>^ tokens)
{
result = tokens;
Scan(input);
}
property IList<Object^>^ Tokens
{
System::Collections::Generic::IList<Object^>^ get()
{
return result;
}
}
static initonly Object^ Add = gcnew Object();
static initonly Object^ Sub = gcnew Object();
static initonly Object^ Mul = gcnew Object();
static initonly Object^ Div = gcnew Object();
static initonly Object^ Semi = gcnew Object();
static initonly Object^ Equal = gcnew Object();
private:
void Scan(TextReader^ input)
{
while (input->Peek() != -1)
{
Char ch = safe_cast<Char>(input->Peek());
if (Char::IsWhiteSpace(ch))
input->Read();
else if (Char::IsLetter(ch) || ch == '_')
{
StringBuilder^ accum = gcnew StringBuilder();
while (Char::IsLetter(ch) || ch == '_')
{
accum->Append(ch);
input->Read();
if (input->Peek() == -1)
break;
else
ch = safe_cast<Char>(input->Peek());
}
result->Add(accum->ToString());
}
else if (ch == '"')
{
StringBuilder^ accum = gcnew StringBuilder();
input->Read(); // skip the '"'
if (input->Peek() == -1)
throw gcnew System::Exception("unterminated string literal");
while ((ch = safe_cast<Char>(input->Peek())) != '"')
{
accum->Append(ch);
input->Read();
if (input->Peek() == -1)
throw gcnew System::Exception("unterminated string literal");
}
input->Read();
result->Add(accum);
}
else if (Char::IsDigit(ch))
{
StringBuilder^ accum = gcnew StringBuilder();
while (Char::IsDigit(ch))
{
accum->Append(ch);
input->Read();
if (input->Peek() == -1)
break;
else
ch = safe_cast<Char>(input->Peek());
}
result->Add(int::Parse(accum->ToString()));
}
else
{
switch (ch)
{
case '+':
input->Read();
result->Add(Scanner::Add);
break;
case '-':
input->Read();
result->Add(Scanner::Sub);
break;
case '*':
input->Read();
result->Add(Scanner::Mul);
break;
case '/':
input->Read();
result->Add(Scanner::Div);
break;
case '=':
input->Read();
result->Add(Scanner::Equal);
break;
case ';':
input->Read();
result->Add(Scanner::Semi);
break;
default:
throw gcnew System::Exception("Scanner encountered unrecognized character '" + ch + "'");
}
}
}
}
};
public ref class Parser sealed
{
private:
int index;
IList<Object^>^ tokens;
initonly Stmt^ result;
public:
Parser(IList<Object^>^ tok)
{
tokens = tok;
index = 0;
result = ParseStmt();
if (this->index != this->tokens->Count)
throw gcnew System::Exception("expected EOF");
}
property Stmt^ Result
{
Stmt^ get()
{
return result;
}
}
private:
Stmt^ ParseStmt()
{
Stmt^ result;
if(index == tokens->Count)
throw gcnew Exception("expected statement, got EOF");
if(tokens[index]->Equals("print"))
{
index++;
Print^ print = gcnew Print();
print->Expr = ParseExpr();
result = print;
}
else if(tokens[index]->Equals("var"))
{
index++;
DeclareVar^ declareVar = gcnew DeclareVar();
if(index < tokens->Count && dynamic_cast<String^>(tokens[index]) != nullptr)
declareVar->Ident = (String^)tokens[index];
else
throw gcnew Exception("Expected variable name after 'var'");
index++;
if(index == tokens->Count || tokens[index] != Scanner::Equal)
throw gcnew Exception("Expected = after 'var identification'");
index++;
declareVar->Expr = ParseExpr();
result = declareVar;
}
if(index < tokens->Count && tokens[index] == Scanner::Semi)
{
index++;
if(index < tokens->Count && !tokens[index]->Equals("end"))
{
Sequence^ seq = gcnew Sequence();
seq->First = result;
seq->Second = ParseStmt();
result = seq;
}
}
return result;
}
Expr^ ParseExpr()
{
if(index == tokens->Count)
throw gcnew Exception("expected statement, got EOF");
if (dynamic_cast<StringBuilder^>(tokens[index]) != nullptr)
{
String^ value = ((StringBuilder^)tokens[index++])->ToString();
StringLiteral^ stringLiteral = gcnew StringLiteral();
stringLiteral->Value = value;
return stringLiteral;
}
else if(dynamic_cast<String^>(tokens[index]) != nullptr)
{
String^ ident = (String^)tokens[index++];
Variable^ var = gcnew Variable();
var->Ident = ident;
return var;
}
else
throw gcnew Exception("expected string lietral, int literal or variable");
}
};
ref class CodeGen sealed
{
private:
ILGenerator^ il;
Dictionary<String^, LocalBuilder^>^ symbolTable;
public:
CodeGen(Stmt^ stmt, String^ moduleName)
{
if(Path::GetFileName(moduleName) != moduleName)
throw gcnew Exception("can only output into current directory");
AssemblyName^ name = gcnew AssemblyName(Path::GetFileNameWithoutExtension(moduleName));
AssemblyBuilder^ asmb = AppDomain::CurrentDomain->DefineDynamicAssembly(name, AssemblyBuilderAccess::Save);
ModuleBuilder^ modb = asmb->DefineDynamicModule(moduleName);
TypeBuilder^ typeBuilder = modb->DefineType("Foo");
MethodBuilder^ methb = typeBuilder->DefineMethod("Main", MethodAttributes::Static, void::typeid, Type::EmptyTypes);
il = methb->GetILGenerator();
symbolTable = gcnew Dictionary<String^, LocalBuilder^>();
GenStmt(stmt);
il->Emit(OpCodes::Ret);
typeBuilder->CreateType();
modb->CreateGlobalFunctions();
asmb->SetEntryPoint(methb);
asmb->Save(moduleName);
symbolTable = nullptr;
il = nullptr;
}
private:
void GenStmt(Stmt^ stmt)
{
if(dynamic_cast<Sequence^>(stmt) != nullptr)
{
Sequence^ seq = (Sequence^)stmt;
GenStmt(seq->First);
GenStmt(seq->Second);
}
else if(dynamic_cast<Print^>(stmt) != nullptr)
{
GenExpr(dynamic_cast<Print^>(stmt)->Expr, String::typeid);
il->Emit(OpCodes::Call, System::Console::typeid->GetMethod("WriteLine", gcnew array<System::Type^> {String::typeid}));
}
else if(dynamic_cast<DeclareVar^>(stmt) != nullptr)
{
DeclareVar^ declare = dynamic_cast<DeclareVar^>(stmt);
symbolTable[declare->Ident] = il->DeclareLocal(TypeOfExpr(declare->Expr));
Assign^ assign = gcnew Assign();
assign->Ident = declare->Ident;
assign->Expr = declare->Expr;
GenStmt(assign);
}
else if(dynamic_cast<Assign^>(stmt) != nullptr)
{
Assign^ assign = dynamic_cast<Assign^>(stmt);
GenExpr(assign->Expr, TypeOfExpr(assign->Expr));
Store(assign->Ident, TypeOfExpr(assign->Expr));
}
}
void Store(String^ name, Type^ type)
{
if(symbolTable->ContainsKey(name))
{
LocalBuilder^ locb = symbolTable[name];
if(locb->LocalType == type)
il->Emit(OpCodes::Stloc, symbolTable[name]);
else
throw gcnew Exception("'" + name + "' is of type " + locb->LocalType->Name + " but attempted to store value of type " + type->Name);
}
else
throw gcnew Exception("undeclared variable '" + name + "'");
}
void GenExpr(Expr^ expr, Type^ expectedType)
{
Type^ deliveredType;
if(dynamic_cast<StringLiteral^>(expr) != nullptr)
{
deliveredType = String::typeid;
il->Emit(OpCodes::Ldstr, dynamic_cast<StringLiteral^>(expr)->Value);
}
else if(dynamic_cast<Variable^>(expr) != nullptr)
{
String^ ident = dynamic_cast<Variable^>(expr)->Ident;
deliveredType = TypeOfExpr(expr);
if(!symbolTable->ContainsKey(ident))
throw gcnew Exception("undeclared variable '" + ident + "'");
il->Emit(OpCodes::Ldloc, symbolTable[ident]);
}
if(deliveredType != expectedType)
{
if(deliveredType == int::typeid &&
expectedType == String::typeid)
{
il->Emit(OpCodes::Box, int::typeid);
il->Emit(OpCodes::Callvirt, Object::typeid->GetMethod("ToString"));
}
else
throw gcnew Exception("Can't coerce a " + deliveredType->Name + " to a " + expectedType->Name);
}
}
Type^ TypeOfExpr(Expr^ expr)
{
if(dynamic_cast<StringLiteral^>(expr) != nullptr)
{
return String::typeid;
}
else if(dynamic_cast<IntLiteral^>(expr) != nullptr)
{
return int::typeid;
}
else if(dynamic_cast<Variable^>(expr) != nullptr)
{
Variable^ var = dynamic_cast<Variable^>(expr);
if(symbolTable->ContainsKey(var->Ident))
{
LocalBuilder^ locb = symbolTable[var->Ident];
return locb->LocalType;
}
else
throw gcnew Exception("undeclared variable '" + var->Ident + "'");
}
else
throw gcnew Exception("don't know how to calculate the type of " + expr->GetType()->Name);
}
};
int main(array<System::String^ >^ args)
{
try
{
TextReader^ input = File::OpenText(args[0]);
Scanner^ scanner = gcnew Scanner(input);
if(args->Length > 2)
{
for(int i=1;i<args->Length;i++)
{
input = File::OpenText(args[i]);
scanner = gcnew Scanner(input, scanner->Tokens);
}
}
Parser^ parser = gcnew Parser(scanner->Tokens);
CodeGen^ codeGen = gcnew CodeGen(parser->Result, Path::GetFileNameWithoutExtension(args[0])+".exe");
}
catch(Exception^ ex)
{
Console::Error->WriteLine(ex->Message);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment