Created
July 13, 2016 05:14
-
-
Save codepainkiller/dfeb4c167197b934ce0f4006f446f36b to your computer and use it in GitHub Desktop.
Analizador Lexico
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <cstring> | |
#include <algorithm> | |
#include <vector> | |
#include <stack> | |
#include <map> | |
#include <fstream> | |
using namespace std; | |
const map<int, string> TOKENS = { | |
{1, "IDENTIFICADOR"}, | |
{2, "ENTERO"}, | |
{4, "ASIGNACION"}, | |
{5, "SUMA"}, | |
{6, "MULTIPLICACION"}, | |
{7, "PARENTESIS_IZQ"}, | |
{8, "PARENTESIS_DER"}, | |
{9, "PUNTO_COMA"}, | |
{10, "ERROR"}, | |
{11, "FIN"} | |
}; | |
// Count final states | |
const unsigned int NUM_FS = 10; | |
const unsigned int MATRIX[12][11] = { | |
{1, 2, 3, 10, 5, 6, 7, 8, 9, 11, 10}, | |
{1, 1, 3, 10, 5, 6, 7, 8, 9, 11, 10}, | |
{1, 2, 3, 10, 5, 6, 7, 8, 9, 11, 10}, | |
{10, 10, 10, 4, 10, 10, 10, 10, 10, 10, 10}, | |
{1, 2, 3, 10, 5, 6, 7, 8, 9, 11, 10}, | |
{1, 2, 3, 10, 5, 6, 7, 8, 9, 11, 10}, | |
{1, 2, 3, 10, 5, 6, 7, 8, 9, 11, 10}, | |
{1, 2, 3, 10, 5, 6, 7, 8, 9, 11, 10}, | |
{1, 2, 3, 10, 5, 6, 7, 8, 9, 11, 10}, | |
{1, 2, 3, 10, 5, 6, 7, 8, 9, 11, 10}, | |
{1, 2, 3, 10, 5, 6, 7, 8, 9, 11, 10}, | |
/*{, , , , , , , , , , }*/ | |
}; | |
const unsigned int INITIAL_STATE = 0; | |
const unsigned int FINAL_STATES[NUM_FS] = { | |
1, 2, 4, 5, 6, 7, 8, 9, 10, 11 | |
}; | |
template <typename T> | |
void print(T const& v) | |
{ | |
typename T::const_iterator it; | |
for (it = v.begin(); it != v.end(); ++it) | |
{ | |
std::cout << *it << ' '; | |
} | |
std::cout << std::endl; | |
} | |
// Delete spaces, tabs, EOF | |
string parseSource(string str) | |
{ | |
str.erase(std::remove(str.begin(), str.end(), ' '), str.end()); | |
str.erase(std::remove(str.begin(), str.end(), '\t'), str.end()); | |
str.erase(std::remove(str.begin(), str.end(), '\n'), str.end()); | |
str.insert(str.end(), '$'); | |
return str; | |
} | |
string readFile(string path) | |
{ | |
string line; | |
string sourceCode = ""; | |
ifstream myfile (path.c_str()); | |
if (myfile.is_open()) { | |
cout << "Leyendo archivo..." << endl << endl; | |
while ( getline (myfile,line) ) { | |
cout << line << '\n'; | |
sourceCode += line; | |
} | |
myfile.close(); | |
} | |
else { | |
cout << "Archivo no encontrado." << endl; | |
exit(1); | |
} | |
return parseSource(sourceCode); | |
} | |
bool isFinalState(unsigned int state) | |
{ | |
for (unsigned int i = 0; i < NUM_FS; i++) { | |
if (FINAL_STATES[i] == state) { | |
return true; | |
} | |
} | |
return false; | |
} | |
vector<unsigned int> automata(vector<unsigned int> w) | |
{ | |
unsigned int q, s; | |
vector<unsigned int> tokens; | |
q = INITIAL_STATE; | |
for(int i = 0; i < w.size(); i++){ | |
s = w.at(i); | |
q = MATRIX[q][s]; | |
if (isFinalState(q)) { | |
if (tokens.empty()) { | |
tokens.push_back(q); | |
} else if (q != tokens.back()) { | |
tokens.push_back(q); | |
} | |
} | |
} | |
return tokens; | |
} | |
vector<unsigned int> charsToIndexes(string word) | |
{ | |
vector<unsigned int> wordIndexes; | |
for (unsigned int i = 0; i < word.size(); i++) { | |
char c = word.at(i); | |
if (isalpha(c)) { | |
wordIndexes.push_back(0); | |
} else if (isdigit(c)) { | |
wordIndexes.push_back(1); | |
} else if (c == ':') { | |
wordIndexes.push_back(2); | |
} else if (c == '=') { | |
wordIndexes.push_back(3); | |
} else if (c == '+') { | |
wordIndexes.push_back(4); | |
} else if (c == '*') { | |
wordIndexes.push_back(5); | |
} else if (c == '(') { | |
wordIndexes.push_back(6); | |
} else if (c == ')') { | |
wordIndexes.push_back(7); | |
} else if (c == ';') { | |
wordIndexes.push_back(8); | |
} else if (c == '$') { | |
wordIndexes.push_back(9); | |
} else { | |
wordIndexes.push_back(10); | |
} | |
} | |
return wordIndexes; | |
} | |
int main() | |
{ | |
string code = readFile("source.txt"); | |
vector<unsigned int> tokens; | |
tokens = automata(charsToIndexes(code)); | |
cout << endl << "Tokens generados..." << endl << endl; | |
for(unsigned int i = 0; i < tokens.size(); i++) { | |
cout << TOKENS.at(tokens.at(i)) << endl; | |
} | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AA := (BB + 5) * CC; | |
DD =: 458##; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment