Last active
November 25, 2019 09:19
-
-
Save milesrout/8128070 to your computer and use it in GitHub Desktop.
Officially The Messiest Code Ever
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <map> | |
#include <list> | |
#include <string> | |
#include <iostream> | |
using namespace std; | |
enum struct token_type { | |
ID, // identifier | |
HEX, // hexidecimal integer | |
DEC, // decimal integer | |
OCT, // octal integer | |
BIN, // binary integer | |
COMMA, // comma | |
DOT, // full stop/dot/period | |
COLON, // colon | |
SEMI, // semi-colon | |
STRING, // double-quote delimited string | |
NL, // newline | |
}; | |
struct token { | |
token_type type; | |
string value; | |
token(token_type type, string value) : type(type), value(value) {} | |
}; | |
enum struct instr_type { | |
SET, | |
ADD, SUB, | |
MUL, MLI, | |
DIV, DVI, | |
MOD, MDI, | |
AND, BOR, XOR, | |
SHR, ASR, SHL, | |
IFB, IFC, | |
IFE, IFN, | |
IFG, IFA, | |
IFL, IFU, | |
}; | |
enum struct value_type { | |
REG, // register - 00-07 REGISTER, 1B SP, 1C PC, 1D EX | |
LIT, // literal - 1F NEXT WORD and 20-3F LITERAL VALUE. | |
STK, // stack - 18 PUSH/POP, 19 PEEK, 1A PICK | |
RVL, // register value - 08-0F [REGISTER] if value is 0, 10-17 [REGISTER + NEXT WORD] otherwise | |
NVL, // next value - 1E [NEXT WORD] | |
}; | |
struct value { | |
value_type type; | |
int value; | |
}; | |
struct instruction { | |
instr_type type; | |
list<value> values; | |
}; | |
list<token> | |
tokenise(string::iterator begin, string::iterator end) | |
{ | |
list<token> tokens; | |
string::iterator iter = begin; | |
while (iter != end) { | |
cout << *iter; | |
if (*iter == ',') { | |
tokens.emplace_back(token_type::COMMA, ","); | |
++iter; | |
} | |
else if (*iter == '.') { | |
tokens.emplace_back(token_type::DOT, "."); | |
++iter; | |
} | |
else if (*iter == '\n') { | |
tokens.emplace_back(token_type::NL, "\n"); | |
++iter; | |
} | |
else if (*iter == ':') { | |
tokens.emplace_back(token_type::COLON, ":"); | |
++iter; | |
} | |
else if (*iter == ';') { | |
tokens.emplace_back(token_type::SEMI, ";"); | |
++iter; | |
} | |
else if (*iter == '"') { | |
string str = ""; | |
++iter; | |
while (iter != end && *iter != '"') { | |
str.push_back(*iter); | |
++iter; | |
} | |
// Skip final quotation mark | |
if (iter != end) ++iter; | |
tokens.emplace_back(token_type::STRING, str); | |
continue; | |
} | |
else if (*iter == ' ') { | |
++iter; | |
} | |
else if (*iter == '0') { | |
cout << *iter << "advancing next character" << endl; | |
++iter; | |
cout << *iter << "advanced to next character" << endl; | |
if (iter == end || *iter == ' ') { | |
cout << *iter << "end or space" << endl; | |
tokens.emplace_back(token_type::DEC, "0"); | |
} | |
else if (*iter == 'o' || *iter >= '0' && *iter <= '7') { | |
// octal | |
string tok; | |
// skip the octal designator | |
if (*iter == 'o') ++iter; | |
while (iter != end && *iter != ' ') { | |
tok.push_back(*iter); | |
++iter; | |
} | |
tokens.emplace_back(token_type::OCT, tok); | |
} | |
else if (*iter == 'x') { | |
// hexidecimal | |
string tok; | |
// skip the hex designator | |
++iter; | |
while (iter != end && *iter != ' ') { | |
tok.push_back(*iter); | |
++iter; | |
} | |
tokens.emplace_back(token_type::HEX, tok); | |
} | |
else if (*iter == 'b') { | |
// binary | |
string tok; | |
// skip the binary designator | |
++iter; | |
while (iter != end && *iter != ' ') { | |
tok.push_back(*iter); | |
++iter; | |
} | |
tokens.emplace_back(token_type::BIN, tok); | |
} | |
continue; | |
} | |
else { | |
// ident | decimal | |
if (*iter >= '0' && *iter <= '9') { | |
// decimal | |
string tok; | |
while (iter != end && *iter != ' ') { | |
tok.push_back(*iter); | |
++iter; | |
} | |
tokens.emplace_back(token_type::DEC, tok); | |
} | |
else if (*iter == '_' || | |
*iter >= 'A' && *iter <= 'Z' || | |
*iter >= 'a' && *iter <= 'z') { | |
// identifier | |
string tok; | |
while (iter != end && | |
(*iter == '_' || | |
*iter >= 'A' && *iter <= 'Z' || | |
*iter >= 'a' && *iter <= 'z')) { | |
tok.push_back(*iter); | |
++iter; | |
} | |
tokens.emplace_back(token_type::ID, tok); | |
} | |
} | |
} // end while-loop | |
return tokens; | |
} // end function | |
int main() | |
{ | |
// string str = "0123 \"0b123\" 0x123"; | |
string str = "set pc, 0x123"; | |
map<token_type, string> tt_index; | |
tt_index[token_type::ID] = "identifier"; | |
tt_index[token_type::HEX] = "hexidecimal integer"; | |
tt_index[token_type::DEC] = "decimal integer"; | |
tt_index[token_type::OCT] = "octal integer"; | |
tt_index[token_type::BIN] = "binary integer"; | |
tt_index[token_type::COMMA] = "comma"; | |
tt_index[token_type::DOT] = "full stop/dot/period"; | |
tt_index[token_type::COLON] = "colon"; | |
tt_index[token_type::SEMI] = "semi-colon"; | |
tt_index[token_type::STRING] = "double-quote delimited string"; | |
tt_index[token_type::NL] = "newline"; | |
list<token> tokens = tokenise(begin(str), end(str)); | |
cout << endl << endl << "==PRINTING NEXT TOKEN==" << endl; | |
for (token& tok : tokens) { | |
cout << "[" << tt_index[tok.type] << ":" << tok.value << ']' << endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
There's a 120-line function.