Last active
October 15, 2016 10:11
-
-
Save kmalloc/9cc142fb6a4f1d6cf0c3a9c98ea3cb77 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <string.h> | |
#include <map> | |
#include <vector> | |
#include <iostream> | |
#include <assert.h> | |
const int OP_MAX = 9; | |
const int SCOPE_MAX = 3; | |
enum TokenType { | |
TT_None, | |
TT_Eof, | |
TT_Semi, | |
TT_Colon, | |
TT_Id, | |
TT_Left_Paren, | |
TT_Right_Paren, | |
TT_Left_Brace, | |
TT_Right_Brace, | |
TT_Class, | |
TT_Struct, | |
TT_Enum, | |
TT_Union, | |
TT_Equal, | |
TT_Assign, | |
TT_Dot, | |
TT_Arrow, | |
TT_Left_Bracket, | |
TT_Right_Bracket, | |
TT_Op, // +, -, *./, etc | |
TT_If, | |
TT_Else, | |
TT_For, | |
TT_While, | |
TT_Do, | |
TT_Public, | |
TT_Private, | |
TT_Protected, | |
}; | |
enum ExpType { | |
ET_None, | |
ET_Scope, | |
ET_Decl, // variable declaration | |
ET_FuncDecl, // function declaration | |
ET_FuncImpl, // function implementation | |
ET_Call, | |
ET_Assign, | |
ET_Bin_Calc, | |
ET_NOP, | |
ET_If, | |
ET_For, | |
ET_While, | |
ET_Else, | |
ET_Do, | |
ET_Op, | |
ET_Label, | |
ET_ClassImpl | |
}; | |
struct Token { | |
Token(): type_(TT_None), s_(NULL), e_(NULL) {} | |
TokenType type_; | |
const char* s_, *e_; | |
}; | |
struct Exp { | |
void reset() { exp_.clear(); type_ = ET_None; } | |
int type_; | |
std::vector<Token> exp_; | |
}; | |
#define is_id_ch(c) (std::isalpha(c) || c == '_' || std::isdigit(c)) | |
Token tokenize(const char* &str) { | |
while (*str && std::isspace(*str)) ++str; | |
Token t; | |
t.s_ = str; | |
if (*str) ++str; | |
while (*str && is_id_ch(*str)) ++str; | |
t.e_ = str; | |
t.type_ = TT_Id; | |
char c = *t.s_; | |
if (*t.s_ == 0) t.type_ = TT_Eof; | |
else if (c == '='){ | |
if (*(t.s_ + 1) != '='){ | |
t.type_ = TT_Assign; | |
} else { | |
t.e_ = t.s_ + 2; | |
t.type_ = TT_Equal; | |
} | |
} | |
else if (!memcmp(t.s_, "->", 2)) t.type_ = TT_Arrow; | |
else if (c == '.') t.type_ = TT_Dot; | |
else if (c == ';') t.type_ = TT_Semi; | |
else if (c == '(') t.type_ = TT_Left_Paren; | |
else if (c == ')') t.type_ = TT_Right_Paren; | |
else if (c == '{') t.type_ = TT_Left_Brace; | |
else if (c == '}') t.type_ = TT_Right_Brace; | |
else if (c == ':') t.type_ = TT_Colon; | |
else if (c == '<') t.type_ = TT_Left_Bracket; | |
else if (c == '>') t.type_ = TT_Right_Bracket; | |
else if (!memcmp(t.s_, "class", 5) || !memcmp(t.s_, "struct", 6)) { | |
t.type_ = TT_Class; | |
} else if (!memcmp(t.s_, "if", 2)){ | |
t.type_ = TT_If; | |
} else if (memcmp(t.s_, "for", 3) == 0){ | |
t.type_ = TT_For; | |
} else if (memcmp(t.s_, "while", 5) == 0){ | |
t.type_ = TT_While; | |
} else if (memcmp(t.s_, "do", 2) == 0){ | |
t.type_ = TT_Do; | |
} else if (memcmp(t.s_, "else", 4) == 0){ | |
t.type_ = TT_Else; | |
} else if (memcmp(t.s_, "public", 6) == 0){ | |
t.type_ = TT_Public; | |
tokenize(str); // consume ":" | |
} else if (memcmp(t.s_, "private", 7) == 0){ | |
t.type_ = TT_Private; | |
tokenize(str); // consume ":" | |
} else if (memcmp(t.s_, "protected", 9) == 0){ | |
t.type_ = TT_Protected; | |
tokenize(str); // consume ":" | |
} else if (memcmp(t.s_, "unsigned", 8) == 0) { | |
Token t2 = tokenize(str); | |
t.e_ = t2.e_; | |
} else if (!std::isalpha(c)) { | |
t.type_ = TT_Op; | |
} | |
return t; | |
} | |
void GetStatement(const char* &str, Exp& exp) { | |
Token t; | |
int left_brace = 0; | |
while (true) { | |
t = tokenize(str); | |
if (t.type_ == TT_Eof) { | |
break; | |
} else if (t.type_ == TT_Left_Brace) { | |
if (exp.exp_.empty()) { | |
exp.exp_.push_back(t); | |
break; | |
} | |
left_brace++; | |
} else if (t.type_ == TT_Right_Brace) { | |
if (left_brace > 0) { | |
left_brace--; | |
} | |
if (left_brace == 0) { | |
exp.exp_.push_back(t); | |
break; | |
} | |
} else if (t.type_ == TT_Semi) { | |
if (!exp.exp_.empty() && !left_brace) break; | |
} | |
exp.exp_.push_back(t); | |
t.type_ = TT_None; | |
} | |
size_t token_num = exp.exp_.size(); | |
if (exp.exp_[0].type_ == TT_Left_Brace) { | |
exp.type_ = ET_Scope; | |
} else if (token_num <= 2) { | |
exp.type_ = ET_NOP; // eg, "var;" | |
} else if (exp.exp_[0].type_ == TT_Id && exp.exp_[1].type_ == TT_Id) { | |
if (exp.exp_[2].type_ == TT_Left_Paren) { | |
if (t.type_ == TT_Right_Brace) exp.type_ = ET_FuncImpl; | |
else exp.type_ = ET_Decl; // ET_FuncDecl; | |
} else { | |
exp.type_ = ET_Decl; | |
} | |
} else if (exp.exp_[0].type_ == TT_Id) { | |
if (exp.exp_[1].type_ == TT_Assign) { | |
exp.type_ = ET_Assign; | |
} else if (exp.exp_[1].type_ == TT_Left_Bracket) { | |
// template | |
// TODO | |
assert(0); | |
}else { | |
assert(0); | |
} | |
} else if (exp.exp_[0].type_ == TT_Class) { | |
if (exp.exp_[3].type_ == TT_Colon || exp.exp_[3].type_ == TT_Left_Brace) { | |
exp.type_ = ET_ClassImpl; | |
} else { | |
std::swap(exp.exp_[0], exp.exp_[exp.exp_.size()-1]); | |
exp.exp_.erase(exp.exp_.begin() + exp.exp_.size() - 1); | |
exp.type_ = ET_Decl; | |
} | |
} else { | |
assert(0); | |
} | |
} | |
int token2type(const Token& t) { | |
const char* s = t.s_; | |
if (memcmp(s, "int", 3) == 0) { | |
return 1; | |
} else if (memcmp(s, "unsigned int", 12) == 0) { | |
return 2; | |
} else if (memcmp(s, "short", 5) == 0) { | |
return 3; | |
} else if (memcmp(s, "unsigned short", 15) == 0) { | |
return 4; | |
} else if (memcmp(s, "char", 4) == 0) { | |
return 5; | |
} else if (memcmp(s, "unsigned char", 15) == 0) { | |
return 6; | |
} else if (memcmp(s, "float", 5) == 0) { | |
return 7; | |
} else if (memcmp(s, "double", 6) == 0) { | |
return 8; | |
} else { | |
static int s_user_type = 512; | |
static std::map<std::string, int> s_type_m; | |
const std::string str(t.s_, t.e_); | |
std::map<std::string, int>::const_iterator it = s_type_m.find(str); | |
if (it != s_type_m.end()) return it->second; | |
s_type_m[str] = s_user_type++; | |
return s_user_type - 1; | |
} | |
} | |
void analyze(const char* buff, std::map<std::string, int>& func_call) { | |
Exp exp; | |
exp.exp_.reserve(32); | |
while (*buff) { | |
exp.reset(); | |
GetStatement(buff, exp); | |
switch (exp.type_) { | |
case ET_FuncDecl: | |
break; | |
case ET_Decl: | |
{ | |
Item item; | |
item.type_ = token2type(exp.exp_[0]); | |
item.name_ = std::string(exp.exp_[1].s_, exp.exp_[1].e_); | |
} | |
break; | |
case ET_FuncImpl: | |
{ | |
std::cout << "func impl\n"; | |
ParseFunction(exp.exp_); | |
} | |
break; | |
case ET_Assign: | |
break; | |
case ET_ClassImpl: | |
break; | |
} | |
} | |
} | |
int main() { | |
const char* txt = "void foo() { voo(); }\nint main() { int a = 3; b = 4; foo();foo(); unsigned int v = 23 + a; voo(); return a + b; }"; | |
std::map<std::string, int> func; | |
analyze(txt, func); | |
for (std::map<std::string, int>::const_iterator it = func.begin(); it != func.end(); ++it) { | |
std::cout << it->first << ":" << it->second << std::endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment