Created
May 3, 2019 10:59
-
-
Save native-m/6513b6a1aab8a82843c39d20a8df41a8 to your computer and use it in GitHub Desktop.
i made a f-ing lexical analyzer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <string> | |
| #include <vector> | |
| enum TokenType | |
| { | |
| TK_PLUS, // the token can be addition operator or negation operator | |
| TK_MIN, // the token can be subtraction operator or negation operator | |
| TK_ASTERISK, // the token can be multiplication operator or pointer dereferece | |
| TK_SLASH, | |
| TK_INC, | |
| TK_DEC, | |
| TK_EQ, | |
| TK_NE, | |
| TK_GT, | |
| TK_LT, | |
| TK_GE, | |
| TK_LE, | |
| TK_RSH, | |
| TK_LSH, | |
| TK_ASSIGN, | |
| // Don't change the order! | |
| TK_IF, | |
| TK_ELSE, | |
| TK_FOR, | |
| TK_WHILE, | |
| TK_DO, | |
| TK_INT, | |
| TK_FLOAT, | |
| TK_IDENTIFIER, | |
| TK_INVALID | |
| }; | |
| struct Token | |
| { | |
| std::string str; | |
| TokenType tokenType; | |
| }; | |
| bool matchNextChar(char ch, std::string::iterator& iter, std::string::iterator& eof) | |
| { | |
| std::string::iterator i = iter; | |
| if (++i == i && i != eof && *i == ch) return true; | |
| return false; | |
| } | |
| Token getOperator(std::string::iterator& iter, std::string::iterator& eof) | |
| { | |
| std::string::iterator old = iter; | |
| char ch = *iter++; | |
| switch (ch) | |
| { | |
| case '+': | |
| if (matchNextChar('+', iter, eof)) return { "++", TK_INC }; | |
| return { "+", TK_PLUS }; | |
| case '-': | |
| if (matchNextChar('-', iter, eof)) return { "--", TK_DEC }; | |
| return { "-", TK_MIN }; | |
| case '*': | |
| return { "*", TK_ASTERISK }; | |
| case '/': | |
| return { "/", TK_SLASH }; | |
| case '>': | |
| if (matchNextChar('=', iter, eof)) return { ">=", TK_GE }; | |
| else if (matchNextChar('>', iter, eof)) return { ">>", TK_RSH }; | |
| return { ">", TK_GT }; | |
| case '<': | |
| if (matchNextChar('=', iter, eof)) return { "<=", TK_LE }; | |
| else if (matchNextChar('<', iter, eof)) return { "<<", TK_LSH }; | |
| return { "<", TK_LT }; | |
| case '=': | |
| if (matchNextChar('=', iter, eof)) return{ ">=", TK_EQ }; | |
| return { "-", TK_ASSIGN }; | |
| default: | |
| break; | |
| } | |
| iter = old; | |
| return { "", TK_INVALID }; | |
| } | |
| bool compareKeyword(const char* a, std::string::iterator& b) | |
| { | |
| while (*a != '\0' && *b != '\0') | |
| { | |
| if (*a++ != *b++) | |
| return false; | |
| } | |
| return true; | |
| } | |
| Token getKeyword(std::string::iterator& iter) | |
| { | |
| static const char* keywords[] = { | |
| "if", "else", "for", "while", "do" | |
| }; | |
| static const int keynum = sizeof(keywords) / sizeof(keywords[0]); | |
| std::string::iterator old = iter; | |
| const char* k; | |
| for (int i = 0; i < keynum; i++) | |
| { | |
| k = keywords[i]; | |
| if (compareKeyword(k, iter)) | |
| return { k, (TokenType)(TK_IF + i) }; | |
| iter = old; | |
| } | |
| return { "", TK_INVALID }; | |
| } | |
| Token getIdentifier(std::string::iterator& iter, std::string::iterator& eof) | |
| { | |
| Token ret; | |
| std::string::iterator first = iter; | |
| std::string::iterator last; | |
| int numAlpha = 0; | |
| bool isValidIdentifier = false; | |
| isValidIdentifier = isalpha(*iter) > 0; | |
| while (iter != eof && (isalnum(*iter) || *iter == '_') && *iter++) | |
| { | |
| if (iter != eof && isalpha(*iter)) | |
| numAlpha++; | |
| } | |
| last = iter; | |
| if (isValidIdentifier) | |
| ret = { std::string(first, last), TK_IDENTIFIER }; | |
| else | |
| { | |
| if(numAlpha < 1) | |
| iter = first; | |
| ret = { "", TK_INVALID }; | |
| } | |
| return ret; | |
| } | |
| TokenType isInteger(std::string::iterator& iter, std::string::iterator& eof) | |
| { | |
| int numDigits = 0; | |
| while (iter != eof && isdigit(*iter) && *iter++) | |
| numDigits++; | |
| if (numDigits > 0) | |
| return TK_INT; | |
| return TK_INVALID; | |
| } | |
| TokenType isFloat(std::string::iterator& iter, std::string::iterator& eof) | |
| { | |
| bool hasPoint = false; | |
| bool hasSuffix = false; | |
| int numDigits = 0; | |
| while (iter != eof && isdigit(*iter) && *iter++) | |
| numDigits++; | |
| if (iter != eof && *iter == '.') | |
| { | |
| hasPoint = true; | |
| iter++; | |
| while (iter != eof && isdigit(*iter) && *iter++) | |
| numDigits++; | |
| } | |
| if (iter != eof && *iter == 'f' && iter++ == iter) | |
| hasSuffix = true; | |
| if (numDigits > 0 && (hasPoint || hasSuffix)) | |
| return TK_FLOAT; | |
| return TK_INVALID; | |
| } | |
| Token getNumber(std::string::iterator& iter, std::string::iterator& eof) | |
| { | |
| std::string::iterator old = iter; | |
| TokenType t; | |
| if ((t = isFloat(iter, eof)) != TK_INVALID) | |
| return{ std::string(old, iter), t }; | |
| iter = old; | |
| if ((t = isInteger(iter, eof)) != TK_INVALID) | |
| return { std::string(old, iter), t }; | |
| iter = old; | |
| return { "", TK_INVALID }; | |
| } | |
| void skipWhitespace(std::string::iterator& iter, std::string::iterator& eof) | |
| { | |
| if (isspace((int)*iter)) | |
| iter++; | |
| } | |
| bool skipComments(std::string::iterator& iter, std::string::iterator& eof) | |
| { | |
| char c = *iter; | |
| if (iter != eof && c == '/') | |
| { | |
| if (matchNextChar('/', iter, eof)) | |
| { | |
| while (iter != eof && *iter++ != '\n'); // loop until newline's found | |
| return true; | |
| } | |
| else if (matchNextChar('*', iter, eof)) | |
| { | |
| bool star = false; | |
| // loop until "*/" pattern is matched | |
| while (iter != eof) | |
| { | |
| c = *iter++; | |
| if (c == 0 || (c == '/' && star)) | |
| break; | |
| star = (c == '*'); | |
| } | |
| return true; | |
| } | |
| } | |
| return false; | |
| } | |
| std::vector<Token> generateTokens(std::string& code) | |
| { | |
| std::vector<Token> tokens; | |
| std::string::iterator iter = code.begin(); | |
| std::string::iterator end = code.end(); | |
| while (iter != end) | |
| { | |
| Token tok; | |
| skipWhitespace(iter, end); | |
| if (skipComments(iter, end)) | |
| continue; | |
| if (iter != end && (tok = getKeyword(iter)).tokenType != TK_INVALID) tokens.push_back(tok); | |
| else if (iter != end && (tok = getIdentifier(iter, end)).tokenType != TK_INVALID) tokens.push_back(tok); | |
| else if (iter != end && (tok = getOperator(iter, end)).tokenType != TK_INVALID) tokens.push_back(tok); | |
| else if (iter != end && (tok = getNumber(iter, end)).tokenType != TK_INVALID) tokens.push_back(tok); | |
| else | |
| { | |
| tokens.push_back({ "", TK_INVALID }); | |
| } | |
| } | |
| return tokens; | |
| } | |
| // TESTBENCH HERE | |
| int main() | |
| { | |
| std::string buf; | |
| std::vector<Token> tokens; | |
| buf = "100.+100+a"; | |
| tokens = generateTokens(buf); | |
| //while (true) | |
| //{ | |
| // std::cout << ">"; | |
| // std::getline(std::cin, buf); | |
| // tokens = generateTokens(buf); | |
| // | |
| for (auto& i : tokens) | |
| { | |
| std::cout << i.str << ", " << i.tokenType << std::endl; | |
| } | |
| // | |
| // std::cout << buf << std::endl; | |
| //} | |
| std::cin.get(); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment