Skip to content

Instantly share code, notes, and snippets.

@GnsP
Created January 24, 2016 14:57
Show Gist options
  • Save GnsP/e9b462f5adf740a5612e to your computer and use it in GitHub Desktop.
Save GnsP/e9b462f5adf740a5612e to your computer and use it in GitHub Desktop.
Solution to AttributeParser problem on Code.Cpp3 contest (HackerRank)
#include <cmath>
#include <cstdio>
#include <vector>
#include <map>
#include <utility>
#include <iostream>
#include <algorithm>
#include <cstring>
#include <string>
#include <cctype>
#include <cassert>
using namespace std;
void error(const char *err){
cout << err << endl;
return;
}
enum TokenType{ SYMBOL = 0, OPERATOR, STRING , _NONE };
const char *typeString(TokenType t){
switch(t){
case SYMBOL:
return "SYMBOL";
case OPERATOR:
return "OPERATOR";
case STRING:
return "STRING";
case _NONE:
return "_NONE";
default:
return "ERROR";
}
}
enum Operator{ DIV=15, NOT, DOT, GT, LT, ASSIGN, ERROR };
const char *opString(Operator op){
switch(op){
case DIV: return "DIV";
case NOT: return "NOT";
case GT: return "GT";
case LT: return "LT";
case ASSIGN:return "ASSIGN";
case DOT: return "DOT";
default: return "\0";
}
}
union TokenHolder{
char str[256];
char sym[256];
Operator op;
};
Operator str2op(const char *str){
switch(str[0]){
case '~': return NOT;
case '>': return GT;
case '<':
if(str[1] == '/') return DIV;
return LT;
case '=': return ASSIGN;
case '.': return DOT;
default : return ERROR;
}
}
class Token{
public:
Token():type_(_NONE){}
Token(TokenType type, const char *tok_str){
makeToken(type, tok_str);
}
Token (const Token &oldToken){
type_ = oldToken.type_;
token_ = oldToken.token_;
}
void print(){
cout<<"< "<<typeString(type_)<<", ";
switch(type_){
case SYMBOL: cout<<token_.sym<<" >"; break;
case STRING: cout<<token_.str<<" >"; break;
case OPERATOR: cout<<opString(token_.op)<<" >"; break;
case _NONE: cout<<" >"; break;
default: cout<<" >"; break;
}
cout << endl;
}
void makeToken(TokenType type, const char *tok_str){
type_ = type;
switch(type_){
case SYMBOL:
strcpy( token_.sym, tok_str);
break;
case STRING:
strcpy(token_.str, tok_str);
break;
case OPERATOR:
token_.op = str2op(tok_str);
break;
case _NONE:
break;
default:
cout<<"ERROR OCCURED IN LEXICAL ANALYSIS"<<endl;
break;
}
}
TokenType getType(){ return type_; }
TokenType type_;
TokenHolder token_;
};
class Tokenizer{
public:
Tokenizer():expr_(NULL),type_(_NONE){}
Tokenizer(const char *expr):expr_(expr){}
bool next(Token &t){
bool ret = getNextToken();
t.makeToken(type_, token_);
return ret;
}
const char* getExpr(){ return expr_; }
private:
const char *expr_;
char token_[256];
TokenType type_;
bool isOperator(char ch){
if(strchr("</>.~=", ch)) return true;
return false;
}
bool getNextToken(){
char *temp;
type_ = _NONE;
temp = token_;
*temp = '\0';
if(*expr_ == '\0') return false;
while(isspace(*expr_)) {
expr_++ ;
}
if( isOperator(*expr_)){
while(isOperator(*expr_)){
*temp++ = *expr_++;
}
type_ = OPERATOR;
}
else if(isalnum(*expr_)){
while(isalnum(*expr_) || *expr_ == '_'){
*temp++ = *expr_++;
}
type_ = SYMBOL;
}
else if(*expr_ == '\"'){
*expr_++;
while(*expr_ != '\"'){
*temp++ = *expr_++;
}
*expr_++;
type_ = STRING;
}
else{
while(!isspace(*expr_)){
*temp++ = *expr_++;
}
type_ = SYMBOL;
}
*temp = '\0';
return true;
}
};
struct query{
vector<string> tags;
string attr;
};
struct attr{
string attrName;
string attrVal;
bool notEmpty;
attr():notEmpty(false){}
};
struct tag{
string tagName;
vector<attr> attrs;
vector<tag> tags;
bool notEmpty;
tag():notEmpty(false){}
tag findTag(const string &tg, bool &found){
for(int i=0; i<tags.size(); i++){
if(tags[i].tagName == tg){
found = true;
return tags[i];
}
}
tag t;
found = false;
return t;
}
string solveQuery(query &qry, int index, bool &found){
if(index < qry.tags.size()){
tag t = findTag(qry.tags[index], found);
if(!found) return string("Not Found!");
string res = t.solveQuery(qry, index+1, found);
if(!found) return string("Not Found!");
return res;
}
for(int i=0; i<attrs.size(); i++){
if(attrs[i].attrName == qry.attr){
found=true;
return attrs[i].attrVal;
}
}
found = false;
return string("Not Found");
}
void print(){
cout << "<TAG>" << endl;
cout << "\tname = " << tagName << endl;
cout << "\t<ATTRS>" << endl;
for(int i=0; i<attrs.size(); i++){
cout << "\t\t" << attrs[i].attrName << " = " << attrs[i].attrVal << endl;
}
cout << "\t</ATTRS>" << endl;
for(int i=0; i<tags.size(); i++) tags[i].print();
cout << "</TAG>" << endl << endl;
}
};
struct source{
vector<tag> tags;
tag findTag(const string &tg, bool &found){
for(int i=0; i<tags.size(); i++){
if(tags[i].tagName == tg){
found = true;
return tags[i];
}
}
found = false;
return tags[0];
}
void solveQuery(query &qry){
string tg = qry.tags[0];
bool found;
tag t = findTag(tg, found);
if(!found){
cout << "Not Found!" << endl;
return;
}
string val = t.solveQuery(qry, 1, found);
if(!found){
cout << "Not Found!" << endl;
return;
}
cout << val << endl;
return;
}
void print(){
cout << "<SOURCE size="<< tags.size() <<">" << endl << endl;
for(int i=0; i<tags.size(); i++) tags[i].print();
cout << endl << endl << "</SOURCE>" << endl;
cout.flush();
}
};
class SourceParser{
public:
SourceParser(char *prog):lex_(prog),prog_(prog){}
void parse(source &src_){
check = lex_.next(t);
while(check && t.type_ == OPERATOR && t.token_.op == LT){
tag tg;
parseTag(tg);
src_.tags.push_back(tg);
check = lex_.next(t);
if(!check) break;
}
}
source getSourceTree(){ return src_; }
private:
Tokenizer lex_;
source src_;
char *prog_;
Token t;
bool check;
void parseTag(tag &ent){
check = lex_.next(t);
//assert(t.type_ == SYMBOL);
ent.tagName = string(t.token_.sym);
ent.notEmpty = true;
check = lex_.next(t);
while(!(t.type_ == OPERATOR && t.token_.op == GT)){
attr a;
parseAttr(a);
if(a.notEmpty) ent.attrs.push_back(a);
}
check = lex_.next(t);
while(t.type_ == OPERATOR && t.token_.op == LT){
tag tg_;
parseTag(tg_);
if(tg_.notEmpty) ent.tags.push_back(tg_);
check = lex_.next(t);
}
// take the ending tag and do nothing to store that piece of shit
// only 2 tokens <tagName><GT> should be left from the end tag
// when we get here, assuming that there are no syntactical errors
// in the source provided.
// somehow I am being fucked here, but I have been fucked up worse !!
check = lex_.next(t);
check = lex_.next(t);
}
void parseAttr(attr &a){
if(t.type_ == SYMBOL){
a.notEmpty = true;
a.attrName = string(t.token_.sym);
// read the = token and throw it away
check = lex_.next(t);
check = lex_.next(t);
a.attrVal = string(t.token_.str);
check = lex_.next(t);
}
}
};
class QueryParser{
public:
QueryParser(char *prog):lex_(prog),prog_(prog){}
void parse(){
check = lex_.next(t);
string tg = string(t.token_.sym);
qry_.tags.push_back(tg);
check = lex_.next(t);
while(t.type_ == OPERATOR && t.token_.op == DOT){
string tg;
check = lex_.next(t);
tg = string(t.token_.sym);
qry_.tags.push_back(tg);
check = lex_.next(t);
}
check = lex_.next(t);
qry_.attr = string(t.token_.sym);
}
query getQueryTree() { return qry_; }
private:
Tokenizer lex_;
query qry_;
char *prog_;
Token t;
bool check;
};
void readSource(char *prog, int lines){
char line[256];
int len = 0, stlen;
for(int i=0; i<lines; i++){
cin.getline(line, 256);
stlen = strlen(line);
strcpy(prog+len, line);
len += stlen;
*(prog+len) = '\n';
len++;
}
*(prog+len) = '\0';
}
int main(){
int N, Q;
char prog[40000];
cin >> N >> Q;
cin.getline(prog, 40000);
readSource(prog, N);
source src;
SourceParser sourceParser(prog);
sourceParser.parse(src);
for(int i=0; i<Q; i++){
cin.getline(prog, 40000);
query qry;
QueryParser queryParser(prog);
queryParser.parse();
qry = queryParser.getQueryTree();
src.solveQuery(qry);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment