Skip to content

Instantly share code, notes, and snippets.

@kmalloc
Last active October 15, 2016 10:11
Show Gist options
  • Save kmalloc/9cc142fb6a4f1d6cf0c3a9c98ea3cb77 to your computer and use it in GitHub Desktop.
Save kmalloc/9cc142fb6a4f1d6cf0c3a9c98ea3cb77 to your computer and use it in GitHub Desktop.
#include <string>
#include <string.h>
#include <map>
#include <vector>
#include <iostream>
#include <assert.h>
const int OP_MAX = 9;
const int SCOPE_MAX = 3;
enum TokenType {
TT_None,
TT_Eof,
TT_Semi,
TT_Colon,
TT_Id,
TT_Left_Paren,
TT_Right_Paren,
TT_Left_Brace,
TT_Right_Brace,
TT_Class,
TT_Struct,
TT_Enum,
TT_Union,
TT_Equal,
TT_Assign,
TT_Dot,
TT_Arrow,
TT_Left_Bracket,
TT_Right_Bracket,
TT_Op, // +, -, *./, etc
TT_If,
TT_Else,
TT_For,
TT_While,
TT_Do,
TT_Public,
TT_Private,
TT_Protected,
};
enum ExpType {
ET_None,
ET_Scope,
ET_Decl, // variable declaration
ET_FuncDecl, // function declaration
ET_FuncImpl, // function implementation
ET_Call,
ET_Assign,
ET_Bin_Calc,
ET_NOP,
ET_If,
ET_For,
ET_While,
ET_Else,
ET_Do,
ET_Op,
ET_Label,
ET_ClassImpl
};
struct Token {
Token(): type_(TT_None), s_(NULL), e_(NULL) {}
TokenType type_;
const char* s_, *e_;
};
struct Exp {
void reset() { exp_.clear(); type_ = ET_None; }
int type_;
std::vector<Token> exp_;
};
#define is_id_ch(c) (std::isalpha(c) || c == '_' || std::isdigit(c))
Token tokenize(const char* &str) {
while (*str && std::isspace(*str)) ++str;
Token t;
t.s_ = str;
if (*str) ++str;
while (*str && is_id_ch(*str)) ++str;
t.e_ = str;
t.type_ = TT_Id;
char c = *t.s_;
if (*t.s_ == 0) t.type_ = TT_Eof;
else if (c == '='){
if (*(t.s_ + 1) != '='){
t.type_ = TT_Assign;
} else {
t.e_ = t.s_ + 2;
t.type_ = TT_Equal;
}
}
else if (!memcmp(t.s_, "->", 2)) t.type_ = TT_Arrow;
else if (c == '.') t.type_ = TT_Dot;
else if (c == ';') t.type_ = TT_Semi;
else if (c == '(') t.type_ = TT_Left_Paren;
else if (c == ')') t.type_ = TT_Right_Paren;
else if (c == '{') t.type_ = TT_Left_Brace;
else if (c == '}') t.type_ = TT_Right_Brace;
else if (c == ':') t.type_ = TT_Colon;
else if (c == '<') t.type_ = TT_Left_Bracket;
else if (c == '>') t.type_ = TT_Right_Bracket;
else if (!memcmp(t.s_, "class", 5) || !memcmp(t.s_, "struct", 6)) {
t.type_ = TT_Class;
} else if (!memcmp(t.s_, "if", 2)){
t.type_ = TT_If;
} else if (memcmp(t.s_, "for", 3) == 0){
t.type_ = TT_For;
} else if (memcmp(t.s_, "while", 5) == 0){
t.type_ = TT_While;
} else if (memcmp(t.s_, "do", 2) == 0){
t.type_ = TT_Do;
} else if (memcmp(t.s_, "else", 4) == 0){
t.type_ = TT_Else;
} else if (memcmp(t.s_, "public", 6) == 0){
t.type_ = TT_Public;
tokenize(str); // consume ":"
} else if (memcmp(t.s_, "private", 7) == 0){
t.type_ = TT_Private;
tokenize(str); // consume ":"
} else if (memcmp(t.s_, "protected", 9) == 0){
t.type_ = TT_Protected;
tokenize(str); // consume ":"
} else if (memcmp(t.s_, "unsigned", 8) == 0) {
Token t2 = tokenize(str);
t.e_ = t2.e_;
} else if (!std::isalpha(c)) {
t.type_ = TT_Op;
}
return t;
}
void GetStatement(const char* &str, Exp& exp) {
Token t;
int left_brace = 0;
while (true) {
t = tokenize(str);
if (t.type_ == TT_Eof) {
break;
} else if (t.type_ == TT_Left_Brace) {
if (exp.exp_.empty()) {
exp.exp_.push_back(t);
break;
}
left_brace++;
} else if (t.type_ == TT_Right_Brace) {
if (left_brace > 0) {
left_brace--;
}
if (left_brace == 0) {
exp.exp_.push_back(t);
break;
}
} else if (t.type_ == TT_Semi) {
if (!exp.exp_.empty() && !left_brace) break;
}
exp.exp_.push_back(t);
t.type_ = TT_None;
}
size_t token_num = exp.exp_.size();
if (exp.exp_[0].type_ == TT_Left_Brace) {
exp.type_ = ET_Scope;
} else if (token_num <= 2) {
exp.type_ = ET_NOP; // eg, "var;"
} else if (exp.exp_[0].type_ == TT_Id && exp.exp_[1].type_ == TT_Id) {
if (exp.exp_[2].type_ == TT_Left_Paren) {
if (t.type_ == TT_Right_Brace) exp.type_ = ET_FuncImpl;
else exp.type_ = ET_Decl; // ET_FuncDecl;
} else {
exp.type_ = ET_Decl;
}
} else if (exp.exp_[0].type_ == TT_Id) {
if (exp.exp_[1].type_ == TT_Assign) {
exp.type_ = ET_Assign;
} else if (exp.exp_[1].type_ == TT_Left_Bracket) {
// template
// TODO
assert(0);
}else {
assert(0);
}
} else if (exp.exp_[0].type_ == TT_Class) {
if (exp.exp_[3].type_ == TT_Colon || exp.exp_[3].type_ == TT_Left_Brace) {
exp.type_ = ET_ClassImpl;
} else {
std::swap(exp.exp_[0], exp.exp_[exp.exp_.size()-1]);
exp.exp_.erase(exp.exp_.begin() + exp.exp_.size() - 1);
exp.type_ = ET_Decl;
}
} else {
assert(0);
}
}
int token2type(const Token& t) {
const char* s = t.s_;
if (memcmp(s, "int", 3) == 0) {
return 1;
} else if (memcmp(s, "unsigned int", 12) == 0) {
return 2;
} else if (memcmp(s, "short", 5) == 0) {
return 3;
} else if (memcmp(s, "unsigned short", 15) == 0) {
return 4;
} else if (memcmp(s, "char", 4) == 0) {
return 5;
} else if (memcmp(s, "unsigned char", 15) == 0) {
return 6;
} else if (memcmp(s, "float", 5) == 0) {
return 7;
} else if (memcmp(s, "double", 6) == 0) {
return 8;
} else {
static int s_user_type = 512;
static std::map<std::string, int> s_type_m;
const std::string str(t.s_, t.e_);
std::map<std::string, int>::const_iterator it = s_type_m.find(str);
if (it != s_type_m.end()) return it->second;
s_type_m[str] = s_user_type++;
return s_user_type - 1;
}
}
void analyze(const char* buff, std::map<std::string, int>& func_call) {
Exp exp;
exp.exp_.reserve(32);
while (*buff) {
exp.reset();
GetStatement(buff, exp);
switch (exp.type_) {
case ET_FuncDecl:
break;
case ET_Decl:
{
Item item;
item.type_ = token2type(exp.exp_[0]);
item.name_ = std::string(exp.exp_[1].s_, exp.exp_[1].e_);
}
break;
case ET_FuncImpl:
{
std::cout << "func impl\n";
ParseFunction(exp.exp_);
}
break;
case ET_Assign:
break;
case ET_ClassImpl:
break;
}
}
}
int main() {
const char* txt = "void foo() { voo(); }\nint main() { int a = 3; b = 4; foo();foo(); unsigned int v = 23 + a; voo(); return a + b; }";
std::map<std::string, int> func;
analyze(txt, func);
for (std::map<std::string, int>::const_iterator it = func.begin(); it != func.end(); ++it) {
std::cout << it->first << ":" << it->second << std::endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment