Last active
March 11, 2020 08:20
-
-
Save dbwodlf3/4ea9e3de9cb3710d0607558cfc8d4e69 to your computer and use it in GitHub Desktop.
llvm_cookbook_ch2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| cmake_minimum_required(VERSION 3.4.3) | |
| project(toy) | |
| SET(LLVM_DIR /llvm/lib/cmake/llvm) | |
| find_package(LLVM REQUIRED CONFIG) | |
| message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") | |
| message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") | |
| include_directories(${LLVM_INCLUDE_DIRS}) | |
| add_definitions(${LLVM_DEFINITIONS}) | |
| # Now build our tools | |
| add_executable(toy toy.cpp) | |
| # Find the libraries that correspond to the LLVM components | |
| # that we wish to use | |
| # llvm_map_components_to_libnames(llvm_libs support core irreader) | |
| # Link against LLVM libraries | |
| target_link_libraries(toy ${llvm_libs}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "llvm/IR/Verifier.h" | |
| #include "llvm/IR/DerivedTypes.h" | |
| #include "llvm/IR/IRBuilder.h" | |
| #include "llvm/IR/LLVMContext.h" | |
| #include "llvm/IR/Module.h" | |
| #include <cctype> | |
| #include <cstdio> | |
| #include <map> | |
| #include <string> | |
| #include <vector> | |
| enum Token_Type { | |
| EOF_TOKEN = 0, | |
| NUMERIC_TOKEN, | |
| IDENTIFIER_TOKEN, | |
| PARAN_TOKEN, | |
| DEF_TOKEN | |
| }; | |
| FILE *file; | |
| static std::string Identifier_string; | |
| static int Numeric_Val; | |
| static int get_token() { | |
| static int LastChar = ' '; | |
| while(isspace(LastChar)) | |
| LastChar = fgetc(file); | |
| if(isalpha(LastChar)){ | |
| Identifier_string = LastChar; | |
| while(isalnum((LastChar = fgetc(file)))) | |
| Identifier_string += LastChar; | |
| if(Identifier_string == "def") return DEF_TOKEN; //DEF TOKEN | |
| return IDENTIFIER_TOKEN; //IDENTIFER TOKEN | |
| } | |
| if(isdigit(LastChar)){ | |
| std::string NumStr; | |
| do { | |
| NumStr += LastChar; | |
| LastChar = fgetc(file); | |
| } while(isdigit(LastChar)); | |
| Numeric_Val = strtod(NumStr.c_str(), 0); | |
| return NUMERIC_TOKEN; // NUMERIC TOKEN | |
| } | |
| if(LastChar == '#'){ | |
| do LastChar = fgetc(file); | |
| while(LastChar != EOF && LastChar != '\n' && LastChar != '\r'); | |
| if(LastChar != EOF) return get_token(); //comment pass. | |
| } | |
| if(LastChar == EOF) return EOF_TOKEN; //EOF TOKEN | |
| int ThisChar = LastChar; | |
| LastChar = fgetc(file); | |
| return ThisChar; | |
| } | |
| //get_token이 Lexer 부분임. | |
| // Token의 파싱을 위한 AST | |
| namespace { | |
| class BaseAST { | |
| public: | |
| virtual ~BaseAST() {} | |
| virtual llvm::Value *Codegen() = 0; | |
| }; | |
| class NumericAST : public BaseAST{ | |
| int numeric_val; | |
| public: | |
| NumericAST(int val) : numeric_val(val){} | |
| virtual llvm::Value *Codegen(); | |
| }; | |
| class VariableAST : public BaseAST { | |
| std::string Var_Name; | |
| public: | |
| VariableAST(const std::string &name) : Var_Name(name) {} | |
| virtual llvm::Value *Codegen(); | |
| }; | |
| class BinaryAST : public BaseAST { | |
| std::string Bin_Operator; | |
| BaseAST *LHS, *RHS; | |
| public: | |
| BinaryAST(std::string op, BaseAST *lhs, BaseAST *rhs) : Bin_Operator(op), LHS(lhs), RHS(rhs){} | |
| virtual llvm::Value *Codegen(); | |
| }; | |
| class FunctionCallAST : public BaseAST { | |
| std::string Function_Callee; | |
| std::vector<BaseAST*> Function_Arguments; | |
| public: | |
| FunctionCallAST(const std::string &callee, std::vector<BaseAST*> &args) | |
| : Function_Callee(callee), Function_Arguments(args) {} | |
| virtual llvm::Value *Codegen(); | |
| }; | |
| class FunctionDeclAST { | |
| std::string Func_Name; | |
| std::vector<std::string> Arguments; | |
| public: | |
| FunctionDeclAST(const std::string &name, const std::vector<std::string> &args) : | |
| Func_Name(name), Arguments(args) {}; | |
| llvm::Function *Codegen(); | |
| }; | |
| class FunctionDefnAST { | |
| FunctionDeclAST *Func_Decl; | |
| BaseAST* Body; | |
| public: | |
| FunctionDefnAST(FunctionDeclAST *proto, BaseAST *body) : | |
| Func_Decl(proto), Body(body) {} | |
| llvm::Function* Codegen(); | |
| }; | |
| } | |
| //여기까지가 AST 선언부. | |
| //여기서부터는 Parsing | |
| static int Current_token; | |
| static int next_token() { | |
| return Current_token = get_token(); | |
| } | |
| static std::map<char, int> Operator_Precedence; | |
| static int getBinOpPrecedence(){ | |
| if(!isascii(Current_token)) | |
| return -1; | |
| int TokPrec = Operator_Precedence[Current_token]; | |
| if(TokPrec <= 0) return -1; | |
| return TokPrec; | |
| } | |
| static BaseAST* expression_parser(); | |
| static BaseAST* identifier_parser(){ | |
| std::string IdName = Identifier_string; | |
| next_token(); | |
| if(Current_token !='(') | |
| return new VariableAST(IdName); | |
| next_token(); | |
| std::vector<BaseAST*> Args; | |
| if(Current_token !=')'){ | |
| while(1) { | |
| BaseAST* Arg = expression_parser(); | |
| if(!Arg) return 0; | |
| Args.push_back(Arg); | |
| if(Current_token == ')') break; | |
| if(Current_token != ',') | |
| return 0; | |
| next_token(); | |
| } | |
| } | |
| next_token(); | |
| return new FunctionCallAST(IdName, Args); | |
| } | |
| static BaseAST *numeric_parser() { | |
| BaseAST *Result = new NumericAST(Numeric_Val); | |
| next_token(); | |
| return Result; | |
| } | |
| static BaseAST* paran_parser() { | |
| next_token(); | |
| BaseAST* V = expression_parser(); | |
| if (!V) return 0; | |
| if(Current_token != ')') | |
| return 0; | |
| return V; | |
| } | |
| static BaseAST* Base_Parser() { | |
| switch (Current_token){ | |
| default: return 0; | |
| case IDENTIFIER_TOKEN : return identifier_parser(); | |
| case NUMERIC_TOKEN : return numeric_parser(); | |
| case '(' : return paran_parser(); | |
| } | |
| } | |
| static BaseAST* binary_op_parser(int Old_Prec, BaseAST *LHS){ | |
| while(1){ | |
| int Operator_Prec = getBinOpPrecedence(); | |
| if(Operator_Prec < Old_Prec) | |
| return LHS; | |
| int BinOp = Current_token; | |
| next_token(); | |
| BaseAST* RHS = Base_Parser(); | |
| if(!RHS) return 0; | |
| int Next_Prec = getBinOpPrecedence(); | |
| if(Operator_Prec < Next_Prec) { | |
| RHS = binary_op_parser(Operator_Prec+1, RHS); | |
| if(RHS == 0) return 0; | |
| } | |
| LHS = new BinaryAST(std::to_string(BinOp), LHS, RHS); | |
| } | |
| } | |
| static BaseAST* expression_parser() { | |
| BaseAST* LHS = Base_Parser(); | |
| if(!LHS) return 0; | |
| return binary_op_parser(0, LHS); | |
| } | |
| static FunctionDeclAST* func_decl_parser(){ | |
| if(Current_token != IDENTIFIER_TOKEN) | |
| return 0; | |
| std::string FnName = Identifier_string; | |
| next_token(); | |
| if(Current_token != '(') | |
| return 0; | |
| std::vector<std::string> Function_Argument_Names; | |
| while(next_token() == IDENTIFIER_TOKEN) | |
| Function_Argument_Names.push_back(Identifier_string); | |
| if(Current_token != ')') | |
| return 0; | |
| next_token(); | |
| return new FunctionDeclAST(FnName, Function_Argument_Names); | |
| } | |
| static FunctionDefnAST* func_defn_parser(){ | |
| next_token(); | |
| FunctionDeclAST *Decl = func_decl_parser(); | |
| if(Decl == 0) return 0; | |
| if(BaseAST* Body = expression_parser()) | |
| return new FunctionDefnAST(Decl, Body); | |
| return 0; | |
| } | |
| static FunctionDefnAST* top_level_parser() { | |
| if(BaseAST* E = expression_parser()) { | |
| FunctionDeclAST* Func_Decl = new FunctionDeclAST("", std::vector<std::string>()); | |
| return new FunctionDefnAST(Func_Decl, E); | |
| } | |
| return 0; | |
| } | |
| static void init_precedence() { | |
| Operator_Precedence['-'] = 1; | |
| Operator_Precedence['+'] = 2; | |
| Operator_Precedence['/'] = 3; | |
| Operator_Precedence['*'] = 4; | |
| } | |
| // 여기서부터는 IR Code 생성하기. | |
| static llvm::LLVMContext TheContext; | |
| static llvm::Module *Module_Ob; | |
| static llvm::IRBuilder<> Builder(TheContext); | |
| static std::map<std::string, llvm::Value*> Named_Values; | |
| llvm::Value *NumericAST::Codegen(){ | |
| return llvm::ConstantInt::get(llvm::Type::getInt32Ty(TheContext), numeric_val); | |
| } | |
| llvm::Value *VariableAST::Codegen() { | |
| llvm::Value *V = Named_Values[Var_Name]; | |
| return V ? V : 0; | |
| } | |
| llvm::Value *BinaryAST::Codegen() { | |
| llvm::Value *L = LHS->Codegen(); | |
| llvm::Value *R = RHS->Codegen(); | |
| if(L == 0 || R == 0) return 0; | |
| switch(atoi(Bin_Operator.c_str())) { | |
| case '+' : return Builder.CreateAdd(L, R, "addtmp"); | |
| case '-' : return Builder.CreateSub(L, R, "subtmp"); | |
| case '*' : return Builder.CreateMul(L, R, "multmp"); | |
| case '/' : return Builder.CreateUDiv(L, R, "divtmp"); | |
| default : return 0; | |
| } | |
| } | |
| llvm::Value* FunctionCallAST::Codegen(){ | |
| llvm::Function* CalleeF = Module_Ob->getFunction(Function_Callee); | |
| std::vector<llvm::Value*>ArgsV; | |
| for(unsigned i=0, e=Function_Arguments.size(); i !=e; ++i){ | |
| ArgsV.push_back(Function_Arguments[i]->Codegen()); | |
| if(ArgsV.back() == 0) return 0; | |
| } | |
| return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); | |
| } | |
| llvm::Function *FunctionDeclAST::Codegen() { | |
| std::vector<llvm::Type*>Integers(Arguments.size(), llvm::Type::getInt32Ty(TheContext)); | |
| llvm::FunctionType* FT = llvm::FunctionType::get(llvm::Type::getInt32Ty(TheContext), Integers, false); | |
| llvm::Function* F = llvm::Function::Create(FT, llvm::Function::ExternalLinkage, Func_Name, Module_Ob); | |
| if(F->getName() != Func_Name){ | |
| F->eraseFromParent(); | |
| F = Module_Ob->getFunction(Func_Name); | |
| if(!F->empty()) return 0; | |
| if(F->arg_size() != Arguments.size()) return 0; | |
| } | |
| unsigned Idx = 0; | |
| for(llvm::Function::arg_iterator Arg_It = F->arg_begin(); Idx != Arguments.size(); ++Arg_It, ++Idx){ | |
| Arg_It->setName(Arguments[Idx]); | |
| Named_Values[Arguments[Idx]] = Arg_It; | |
| } | |
| return F; | |
| } | |
| llvm::Function *FunctionDefnAST::Codegen(){ | |
| Named_Values.clear(); | |
| llvm::Function *TheFunction = Func_Decl->Codegen(); | |
| if(TheFunction == 0) return 0; | |
| llvm::BasicBlock* BB = llvm::BasicBlock::Create(TheContext, "entry", TheFunction); | |
| Builder.SetInsertPoint(BB); | |
| if(llvm::Value *RetVal = Body->Codegen()){ | |
| Builder.CreateRet(RetVal); | |
| verifyFunction(*TheFunction); | |
| return TheFunction; | |
| } | |
| TheFunction->eraseFromParent(); | |
| return 0; | |
| } | |
| static void HandleDefn() { | |
| if (FunctionDefnAST *F = func_defn_parser()){ | |
| if(llvm::Function* LF = F->Codegen()){} | |
| } else { | |
| next_token(); | |
| } | |
| } | |
| static void HandleTopExpression() { | |
| if(FunctionDefnAST *F = top_level_parser()){ | |
| if(llvm::Function *LF = F->Codegen()){} | |
| } else { | |
| next_token(); | |
| } | |
| } | |
| //여기까지가 코드 생성. | |
| //여기서부터는 Compiler Driver. | |
| static void Driver() { | |
| while(1) { | |
| switch(Current_token) { | |
| case EOF_TOKEN : return; | |
| case ';' : next_token(); break; | |
| case DEF_TOKEN : HandleDefn(); break; | |
| default : HandleTopExpression(); break; | |
| } | |
| } | |
| } | |
| extern "C" | |
| double putchard(double X) { | |
| putchar((char)X); | |
| return 0; | |
| } | |
| int main(int argc, char* argv[]){ | |
| llvm::LLVMContext &Context = TheContext; | |
| init_precedence(); | |
| file = fopen(argv[1], "r"); | |
| if(file == 0) { | |
| printf("Could not open file\n"); | |
| } | |
| next_token(); | |
| Module_Ob = new llvm::Module("my Compiler", Context); | |
| Driver(); | |
| Module_Ob->dump(); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment