Last active
December 15, 2015 00:39
-
-
Save bencz/5174756 to your computer and use it in GitHub Desktop.
A test of compiler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using namespace System; | |
/* <stmt> := var <ident> = <expr> | |
| <ident> = <expr> | |
| for <ident> = <expr> to <expr> do <stmt> end | |
| read_int <ident> | |
| print <expr> | |
| <stmt> ; <stmt> | |
*/ | |
public ref class Stmt abstract | |
{ | |
}; | |
/* <expr> := <string> | |
* | <int> | |
* | <arith_expr> | |
* | <ident> | |
*/ | |
public ref class Expr abstract | |
{ | |
}; | |
// <bin_op> := + | - | * | / | |
public enum class BinOp | |
{ | |
Add, | |
Sub, | |
Mul, | |
Div | |
}; | |
// var <ident> = <expr> | |
public ref class DeclareVar : Stmt | |
{ | |
public: | |
String^ Ident; | |
Expr^ Expr; | |
}; | |
// print <expr> | |
public ref class Print : Stmt | |
{ | |
public: | |
Expr^ Expr; | |
}; | |
// <ident> = <expr> | |
public ref class Assign : Stmt | |
{ | |
public: | |
String^ Ident; | |
Expr^ Expr; | |
}; | |
// <stmt> ; <stmt> | |
public ref class Sequence : Stmt | |
{ | |
public: | |
Stmt^ First; | |
Stmt^ Second; | |
}; | |
// <string> := " <string_elem>* " | |
public ref class StringLiteral : Expr | |
{ | |
public: | |
String^ Value; | |
}; | |
// <int> := <digit>+ | |
public ref class IntLiteral : Expr | |
{ | |
public: | |
int Value; | |
}; | |
// <ident> := <char> <ident_rest>* | |
// <ident_rest> := <char> | <digit> | |
public ref class Variable : Expr | |
{ | |
public: | |
String^ Ident; | |
}; | |
// <bin_expr> := <expr> <bin_op> <expr> | |
public ref class BinExpr : Expr | |
{ | |
public: | |
Expr^ Left; | |
Expr^ Right; | |
BinOp Op; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "AST.h" | |
using namespace System; | |
using namespace System::IO; | |
using namespace System::Text; | |
using namespace System::Reflection; | |
using namespace System::Reflection::Emit; | |
using namespace System::Collections::Generic; | |
public ref class Scanner sealed | |
{ | |
private: | |
initonly IList<Object^>^ result; | |
public: | |
Scanner(IO::TextReader^ input) | |
{ | |
result = gcnew List<Object^>(); | |
Scan(input); | |
} | |
Scanner(IO::TextReader^ input, IList<Object^>^ tokens) | |
{ | |
result = tokens; | |
Scan(input); | |
} | |
property IList<Object^>^ Tokens | |
{ | |
System::Collections::Generic::IList<Object^>^ get() | |
{ | |
return result; | |
} | |
} | |
static initonly Object^ Add = gcnew Object(); | |
static initonly Object^ Sub = gcnew Object(); | |
static initonly Object^ Mul = gcnew Object(); | |
static initonly Object^ Div = gcnew Object(); | |
static initonly Object^ Semi = gcnew Object(); | |
static initonly Object^ Equal = gcnew Object(); | |
private: | |
void Scan(TextReader^ input) | |
{ | |
while (input->Peek() != -1) | |
{ | |
Char ch = safe_cast<Char>(input->Peek()); | |
if (Char::IsWhiteSpace(ch)) | |
input->Read(); | |
else if (Char::IsLetter(ch) || ch == '_') | |
{ | |
StringBuilder^ accum = gcnew StringBuilder(); | |
while (Char::IsLetter(ch) || ch == '_') | |
{ | |
accum->Append(ch); | |
input->Read(); | |
if (input->Peek() == -1) | |
break; | |
else | |
ch = safe_cast<Char>(input->Peek()); | |
} | |
result->Add(accum->ToString()); | |
} | |
else if (ch == '"') | |
{ | |
StringBuilder^ accum = gcnew StringBuilder(); | |
input->Read(); // skip the '"' | |
if (input->Peek() == -1) | |
throw gcnew System::Exception("unterminated string literal"); | |
while ((ch = safe_cast<Char>(input->Peek())) != '"') | |
{ | |
accum->Append(ch); | |
input->Read(); | |
if (input->Peek() == -1) | |
throw gcnew System::Exception("unterminated string literal"); | |
} | |
input->Read(); | |
result->Add(accum); | |
} | |
else if (Char::IsDigit(ch)) | |
{ | |
StringBuilder^ accum = gcnew StringBuilder(); | |
while (Char::IsDigit(ch)) | |
{ | |
accum->Append(ch); | |
input->Read(); | |
if (input->Peek() == -1) | |
break; | |
else | |
ch = safe_cast<Char>(input->Peek()); | |
} | |
result->Add(int::Parse(accum->ToString())); | |
} | |
else | |
{ | |
switch (ch) | |
{ | |
case '+': | |
input->Read(); | |
result->Add(Scanner::Add); | |
break; | |
case '-': | |
input->Read(); | |
result->Add(Scanner::Sub); | |
break; | |
case '*': | |
input->Read(); | |
result->Add(Scanner::Mul); | |
break; | |
case '/': | |
input->Read(); | |
result->Add(Scanner::Div); | |
break; | |
case '=': | |
input->Read(); | |
result->Add(Scanner::Equal); | |
break; | |
case ';': | |
input->Read(); | |
result->Add(Scanner::Semi); | |
break; | |
default: | |
throw gcnew System::Exception("Scanner encountered unrecognized character '" + ch + "'"); | |
} | |
} | |
} | |
} | |
}; | |
public ref class Parser sealed | |
{ | |
private: | |
int index; | |
IList<Object^>^ tokens; | |
initonly Stmt^ result; | |
public: | |
Parser(IList<Object^>^ tok) | |
{ | |
tokens = tok; | |
index = 0; | |
result = ParseStmt(); | |
if (this->index != this->tokens->Count) | |
throw gcnew System::Exception("expected EOF"); | |
} | |
property Stmt^ Result | |
{ | |
Stmt^ get() | |
{ | |
return result; | |
} | |
} | |
private: | |
Stmt^ ParseStmt() | |
{ | |
Stmt^ result; | |
if(index == tokens->Count) | |
throw gcnew Exception("expected statement, got EOF"); | |
if(tokens[index]->Equals("print")) | |
{ | |
index++; | |
Print^ print = gcnew Print(); | |
print->Expr = ParseExpr(); | |
result = print; | |
} | |
else if(tokens[index]->Equals("var")) | |
{ | |
index++; | |
DeclareVar^ declareVar = gcnew DeclareVar(); | |
if(index < tokens->Count && dynamic_cast<String^>(tokens[index]) != nullptr) | |
declareVar->Ident = (String^)tokens[index]; | |
else | |
throw gcnew Exception("Expected variable name after 'var'"); | |
index++; | |
if(index == tokens->Count || tokens[index] != Scanner::Equal) | |
throw gcnew Exception("Expected = after 'var identification'"); | |
index++; | |
declareVar->Expr = ParseExpr(); | |
result = declareVar; | |
} | |
if(index < tokens->Count && tokens[index] == Scanner::Semi) | |
{ | |
index++; | |
if(index < tokens->Count && !tokens[index]->Equals("end")) | |
{ | |
Sequence^ seq = gcnew Sequence(); | |
seq->First = result; | |
seq->Second = ParseStmt(); | |
result = seq; | |
} | |
} | |
return result; | |
} | |
Expr^ ParseExpr() | |
{ | |
if(index == tokens->Count) | |
throw gcnew Exception("expected statement, got EOF"); | |
if (dynamic_cast<StringBuilder^>(tokens[index]) != nullptr) | |
{ | |
String^ value = ((StringBuilder^)tokens[index++])->ToString(); | |
StringLiteral^ stringLiteral = gcnew StringLiteral(); | |
stringLiteral->Value = value; | |
return stringLiteral; | |
} | |
else if(dynamic_cast<String^>(tokens[index]) != nullptr) | |
{ | |
String^ ident = (String^)tokens[index++]; | |
Variable^ var = gcnew Variable(); | |
var->Ident = ident; | |
return var; | |
} | |
else | |
throw gcnew Exception("expected string lietral, int literal or variable"); | |
} | |
}; | |
ref class CodeGen sealed | |
{ | |
private: | |
ILGenerator^ il; | |
Dictionary<String^, LocalBuilder^>^ symbolTable; | |
public: | |
CodeGen(Stmt^ stmt, String^ moduleName) | |
{ | |
if(Path::GetFileName(moduleName) != moduleName) | |
throw gcnew Exception("can only output into current directory"); | |
AssemblyName^ name = gcnew AssemblyName(Path::GetFileNameWithoutExtension(moduleName)); | |
AssemblyBuilder^ asmb = AppDomain::CurrentDomain->DefineDynamicAssembly(name, AssemblyBuilderAccess::Save); | |
ModuleBuilder^ modb = asmb->DefineDynamicModule(moduleName); | |
TypeBuilder^ typeBuilder = modb->DefineType("Foo"); | |
MethodBuilder^ methb = typeBuilder->DefineMethod("Main", MethodAttributes::Static, void::typeid, Type::EmptyTypes); | |
il = methb->GetILGenerator(); | |
symbolTable = gcnew Dictionary<String^, LocalBuilder^>(); | |
GenStmt(stmt); | |
il->Emit(OpCodes::Ret); | |
typeBuilder->CreateType(); | |
modb->CreateGlobalFunctions(); | |
asmb->SetEntryPoint(methb); | |
asmb->Save(moduleName); | |
symbolTable = nullptr; | |
il = nullptr; | |
} | |
private: | |
void GenStmt(Stmt^ stmt) | |
{ | |
if(dynamic_cast<Sequence^>(stmt) != nullptr) | |
{ | |
Sequence^ seq = (Sequence^)stmt; | |
GenStmt(seq->First); | |
GenStmt(seq->Second); | |
} | |
else if(dynamic_cast<Print^>(stmt) != nullptr) | |
{ | |
GenExpr(dynamic_cast<Print^>(stmt)->Expr, String::typeid); | |
il->Emit(OpCodes::Call, System::Console::typeid->GetMethod("WriteLine", gcnew array<System::Type^> {String::typeid})); | |
} | |
else if(dynamic_cast<DeclareVar^>(stmt) != nullptr) | |
{ | |
DeclareVar^ declare = dynamic_cast<DeclareVar^>(stmt); | |
symbolTable[declare->Ident] = il->DeclareLocal(TypeOfExpr(declare->Expr)); | |
Assign^ assign = gcnew Assign(); | |
assign->Ident = declare->Ident; | |
assign->Expr = declare->Expr; | |
GenStmt(assign); | |
} | |
else if(dynamic_cast<Assign^>(stmt) != nullptr) | |
{ | |
Assign^ assign = dynamic_cast<Assign^>(stmt); | |
GenExpr(assign->Expr, TypeOfExpr(assign->Expr)); | |
Store(assign->Ident, TypeOfExpr(assign->Expr)); | |
} | |
} | |
void Store(String^ name, Type^ type) | |
{ | |
if(symbolTable->ContainsKey(name)) | |
{ | |
LocalBuilder^ locb = symbolTable[name]; | |
if(locb->LocalType == type) | |
il->Emit(OpCodes::Stloc, symbolTable[name]); | |
else | |
throw gcnew Exception("'" + name + "' is of type " + locb->LocalType->Name + " but attempted to store value of type " + type->Name); | |
} | |
else | |
throw gcnew Exception("undeclared variable '" + name + "'"); | |
} | |
void GenExpr(Expr^ expr, Type^ expectedType) | |
{ | |
Type^ deliveredType; | |
if(dynamic_cast<StringLiteral^>(expr) != nullptr) | |
{ | |
deliveredType = String::typeid; | |
il->Emit(OpCodes::Ldstr, dynamic_cast<StringLiteral^>(expr)->Value); | |
} | |
else if(dynamic_cast<Variable^>(expr) != nullptr) | |
{ | |
String^ ident = dynamic_cast<Variable^>(expr)->Ident; | |
deliveredType = TypeOfExpr(expr); | |
if(!symbolTable->ContainsKey(ident)) | |
throw gcnew Exception("undeclared variable '" + ident + "'"); | |
il->Emit(OpCodes::Ldloc, symbolTable[ident]); | |
} | |
if(deliveredType != expectedType) | |
{ | |
if(deliveredType == int::typeid && | |
expectedType == String::typeid) | |
{ | |
il->Emit(OpCodes::Box, int::typeid); | |
il->Emit(OpCodes::Callvirt, Object::typeid->GetMethod("ToString")); | |
} | |
else | |
throw gcnew Exception("Can't coerce a " + deliveredType->Name + " to a " + expectedType->Name); | |
} | |
} | |
Type^ TypeOfExpr(Expr^ expr) | |
{ | |
if(dynamic_cast<StringLiteral^>(expr) != nullptr) | |
{ | |
return String::typeid; | |
} | |
else if(dynamic_cast<IntLiteral^>(expr) != nullptr) | |
{ | |
return int::typeid; | |
} | |
else if(dynamic_cast<Variable^>(expr) != nullptr) | |
{ | |
Variable^ var = dynamic_cast<Variable^>(expr); | |
if(symbolTable->ContainsKey(var->Ident)) | |
{ | |
LocalBuilder^ locb = symbolTable[var->Ident]; | |
return locb->LocalType; | |
} | |
else | |
throw gcnew Exception("undeclared variable '" + var->Ident + "'"); | |
} | |
else | |
throw gcnew Exception("don't know how to calculate the type of " + expr->GetType()->Name); | |
} | |
}; | |
int main(array<System::String^ >^ args) | |
{ | |
try | |
{ | |
TextReader^ input = File::OpenText(args[0]); | |
Scanner^ scanner = gcnew Scanner(input); | |
if(args->Length > 2) | |
{ | |
for(int i=1;i<args->Length;i++) | |
{ | |
input = File::OpenText(args[i]); | |
scanner = gcnew Scanner(input, scanner->Tokens); | |
} | |
} | |
Parser^ parser = gcnew Parser(scanner->Tokens); | |
CodeGen^ codeGen = gcnew CodeGen(parser->Result, Path::GetFileNameWithoutExtension(args[0])+".exe"); | |
} | |
catch(Exception^ ex) | |
{ | |
Console::Error->WriteLine(ex->Message); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment