Skip to content

Instantly share code, notes, and snippets.

@native-m
Created May 3, 2019 10:59
Show Gist options
  • Select an option

  • Save native-m/6513b6a1aab8a82843c39d20a8df41a8 to your computer and use it in GitHub Desktop.

Select an option

Save native-m/6513b6a1aab8a82843c39d20a8df41a8 to your computer and use it in GitHub Desktop.
i made a f-ing lexical analyzer
#include <iostream>
#include <string>
#include <vector>
enum TokenType
{
TK_PLUS, // the token can be addition operator or negation operator
TK_MIN, // the token can be subtraction operator or negation operator
TK_ASTERISK, // the token can be multiplication operator or pointer dereferece
TK_SLASH,
TK_INC,
TK_DEC,
TK_EQ,
TK_NE,
TK_GT,
TK_LT,
TK_GE,
TK_LE,
TK_RSH,
TK_LSH,
TK_ASSIGN,
// Don't change the order!
TK_IF,
TK_ELSE,
TK_FOR,
TK_WHILE,
TK_DO,
TK_INT,
TK_FLOAT,
TK_IDENTIFIER,
TK_INVALID
};
struct Token
{
std::string str;
TokenType tokenType;
};
bool matchNextChar(char ch, std::string::iterator& iter, std::string::iterator& eof)
{
std::string::iterator i = iter;
if (++i == i && i != eof && *i == ch) return true;
return false;
}
Token getOperator(std::string::iterator& iter, std::string::iterator& eof)
{
std::string::iterator old = iter;
char ch = *iter++;
switch (ch)
{
case '+':
if (matchNextChar('+', iter, eof)) return { "++", TK_INC };
return { "+", TK_PLUS };
case '-':
if (matchNextChar('-', iter, eof)) return { "--", TK_DEC };
return { "-", TK_MIN };
case '*':
return { "*", TK_ASTERISK };
case '/':
return { "/", TK_SLASH };
case '>':
if (matchNextChar('=', iter, eof)) return { ">=", TK_GE };
else if (matchNextChar('>', iter, eof)) return { ">>", TK_RSH };
return { ">", TK_GT };
case '<':
if (matchNextChar('=', iter, eof)) return { "<=", TK_LE };
else if (matchNextChar('<', iter, eof)) return { "<<", TK_LSH };
return { "<", TK_LT };
case '=':
if (matchNextChar('=', iter, eof)) return{ ">=", TK_EQ };
return { "-", TK_ASSIGN };
default:
break;
}
iter = old;
return { "", TK_INVALID };
}
bool compareKeyword(const char* a, std::string::iterator& b)
{
while (*a != '\0' && *b != '\0')
{
if (*a++ != *b++)
return false;
}
return true;
}
Token getKeyword(std::string::iterator& iter)
{
static const char* keywords[] = {
"if", "else", "for", "while", "do"
};
static const int keynum = sizeof(keywords) / sizeof(keywords[0]);
std::string::iterator old = iter;
const char* k;
for (int i = 0; i < keynum; i++)
{
k = keywords[i];
if (compareKeyword(k, iter))
return { k, (TokenType)(TK_IF + i) };
iter = old;
}
return { "", TK_INVALID };
}
Token getIdentifier(std::string::iterator& iter, std::string::iterator& eof)
{
Token ret;
std::string::iterator first = iter;
std::string::iterator last;
int numAlpha = 0;
bool isValidIdentifier = false;
isValidIdentifier = isalpha(*iter) > 0;
while (iter != eof && (isalnum(*iter) || *iter == '_') && *iter++)
{
if (iter != eof && isalpha(*iter))
numAlpha++;
}
last = iter;
if (isValidIdentifier)
ret = { std::string(first, last), TK_IDENTIFIER };
else
{
if(numAlpha < 1)
iter = first;
ret = { "", TK_INVALID };
}
return ret;
}
TokenType isInteger(std::string::iterator& iter, std::string::iterator& eof)
{
int numDigits = 0;
while (iter != eof && isdigit(*iter) && *iter++)
numDigits++;
if (numDigits > 0)
return TK_INT;
return TK_INVALID;
}
TokenType isFloat(std::string::iterator& iter, std::string::iterator& eof)
{
bool hasPoint = false;
bool hasSuffix = false;
int numDigits = 0;
while (iter != eof && isdigit(*iter) && *iter++)
numDigits++;
if (iter != eof && *iter == '.')
{
hasPoint = true;
iter++;
while (iter != eof && isdigit(*iter) && *iter++)
numDigits++;
}
if (iter != eof && *iter == 'f' && iter++ == iter)
hasSuffix = true;
if (numDigits > 0 && (hasPoint || hasSuffix))
return TK_FLOAT;
return TK_INVALID;
}
Token getNumber(std::string::iterator& iter, std::string::iterator& eof)
{
std::string::iterator old = iter;
TokenType t;
if ((t = isFloat(iter, eof)) != TK_INVALID)
return{ std::string(old, iter), t };
iter = old;
if ((t = isInteger(iter, eof)) != TK_INVALID)
return { std::string(old, iter), t };
iter = old;
return { "", TK_INVALID };
}
void skipWhitespace(std::string::iterator& iter, std::string::iterator& eof)
{
if (isspace((int)*iter))
iter++;
}
bool skipComments(std::string::iterator& iter, std::string::iterator& eof)
{
char c = *iter;
if (iter != eof && c == '/')
{
if (matchNextChar('/', iter, eof))
{
while (iter != eof && *iter++ != '\n'); // loop until newline's found
return true;
}
else if (matchNextChar('*', iter, eof))
{
bool star = false;
// loop until "*/" pattern is matched
while (iter != eof)
{
c = *iter++;
if (c == 0 || (c == '/' && star))
break;
star = (c == '*');
}
return true;
}
}
return false;
}
std::vector<Token> generateTokens(std::string& code)
{
std::vector<Token> tokens;
std::string::iterator iter = code.begin();
std::string::iterator end = code.end();
while (iter != end)
{
Token tok;
skipWhitespace(iter, end);
if (skipComments(iter, end))
continue;
if (iter != end && (tok = getKeyword(iter)).tokenType != TK_INVALID) tokens.push_back(tok);
else if (iter != end && (tok = getIdentifier(iter, end)).tokenType != TK_INVALID) tokens.push_back(tok);
else if (iter != end && (tok = getOperator(iter, end)).tokenType != TK_INVALID) tokens.push_back(tok);
else if (iter != end && (tok = getNumber(iter, end)).tokenType != TK_INVALID) tokens.push_back(tok);
else
{
tokens.push_back({ "", TK_INVALID });
}
}
return tokens;
}
// TESTBENCH HERE
int main()
{
std::string buf;
std::vector<Token> tokens;
buf = "100.+100+a";
tokens = generateTokens(buf);
//while (true)
//{
// std::cout << ">";
// std::getline(std::cin, buf);
// tokens = generateTokens(buf);
//
for (auto& i : tokens)
{
std::cout << i.str << ", " << i.tokenType << std::endl;
}
//
// std::cout << buf << std::endl;
//}
std::cin.get();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment