Created
December 13, 2014 07:59
-
-
Save Wizmann/35ebc4da3b8c074a0fb3 to your computer and use it in GitHub Desktop.
simple_lexer.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdio> | |
#include <cstdlib> | |
#include <cstring> | |
#include <iostream> | |
#include <algorithm> | |
#include <vector> | |
#include <map> | |
#include <unordered_map> | |
using namespace std; | |
#define print(x) cout << x << endl | |
#define input(x) cin >> x | |
const vector<string> keywords = { | |
"auto", "break", "case", "char", | |
"const", "continue", "default", "do", | |
"double", "else", "enum", "extern", | |
"float", "for", "goto", "if", | |
"int", "long", "register", "return", | |
"short", "signed", "sizeof", "static", | |
"struct", "switch", "typedef", "union", | |
"unsigned", "void", "volatile", "while" | |
}; | |
class Trie { | |
public: | |
void build(const vector<string>& str_vec) { | |
trie_init(); | |
for (const auto& str: str_vec) { | |
trie_insert(str); | |
} | |
} | |
int search(int status, char c) { | |
auto& mp = _trie[status]; | |
if (mp.find(c) == mp.end()) { | |
return -1; | |
} | |
return mp[c]; | |
} | |
bool is_keyword(int status) { | |
if (status == -1) { | |
return false; | |
} | |
auto& mp = _trie[status]; | |
return mp.find('\0') != mp.end(); | |
} | |
private: | |
void trie_init() { | |
_trie.clear(); | |
_trie.reserve(10000); | |
_trie.push_back(unordered_map<char, int>()); | |
} | |
void trie_insert(const string& str) { | |
int ptr = 0; | |
for (auto c: str) { | |
auto& mp = _trie[ptr]; | |
if (mp.find(c) == mp.end()) { | |
_trie.push_back(unordered_map<char, int>()); | |
mp[c] = _trie.size() - 1; | |
} | |
ptr = mp[c]; | |
} | |
auto& mp = _trie[ptr]; | |
mp['\0'] = 1; // mark this as the end of a keyword | |
} | |
private: | |
vector<unordered_map<char, int> > _trie; | |
}; | |
class TrieAdapter { | |
public: | |
TrieAdapter(Trie& trie): _status(0), _trie(trie) {} | |
void add(const char c) { | |
if (_status == -1) { | |
return; | |
} | |
_status = _trie.search(_status, c); | |
} | |
bool is_keyword() { | |
return _trie.is_keyword(_status); | |
} | |
void reset() { | |
_status = 0; | |
} | |
private: | |
int _status; | |
Trie& _trie; | |
}; | |
class Word { | |
public: | |
Word(): _row(0), _col(0) {} | |
Word(int row, int col): _row(row), _col(col), _word() {} | |
bool is_empty() { | |
return _word.empty(); | |
} | |
void add(const char c) { | |
_word += c; | |
} | |
void show(bool is_keyword) { | |
if (is_keyword) { | |
printf("%s\t(%d, %d)", | |
_word.c_str(), _row, _col); | |
} else { | |
printf("ID(%s)\t(%d, %d)", | |
_word.c_str(), _row, _col); | |
} | |
puts(""); | |
} | |
void clear() { | |
_word = string(); | |
} | |
private: | |
int _row, _col; | |
string _word; | |
}; | |
int main() { | |
freopen("input.txt", "r", stdin); | |
char c; | |
int row = 0, col = 0; | |
Trie trie; | |
trie.build(keywords); | |
TrieAdapter adapter(trie); | |
Word word; | |
while (true) { | |
c = '\0'; | |
bool is_eof = !cin.get(c); | |
if (word.is_empty()) { | |
word = Word(row, col); | |
} | |
if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\0') { | |
if (!word.is_empty()) { | |
bool is_keyword = adapter.is_keyword(); | |
word.show(is_keyword); | |
} | |
word.clear(); | |
adapter.reset(); | |
} else { | |
adapter.add(c); | |
word.add(c); | |
} | |
if (c == '\n') { | |
row++; | |
col = 0; | |
} else { | |
col++; | |
} | |
if (is_eof) { | |
break; | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment