Skip to content

Instantly share code, notes, and snippets.

@milesrout
Last active November 25, 2019 09:19
Show Gist options
  • Save milesrout/8128070 to your computer and use it in GitHub Desktop.
Save milesrout/8128070 to your computer and use it in GitHub Desktop.
Officially The Messiest Code Ever
#include <map>
#include <list>
#include <string>
#include <iostream>
using namespace std;
enum struct token_type {
ID, // identifier
HEX, // hexidecimal integer
DEC, // decimal integer
OCT, // octal integer
BIN, // binary integer
COMMA, // comma
DOT, // full stop/dot/period
COLON, // colon
SEMI, // semi-colon
STRING, // double-quote delimited string
NL, // newline
};
struct token {
token_type type;
string value;
token(token_type type, string value) : type(type), value(value) {}
};
enum struct instr_type {
SET,
ADD, SUB,
MUL, MLI,
DIV, DVI,
MOD, MDI,
AND, BOR, XOR,
SHR, ASR, SHL,
IFB, IFC,
IFE, IFN,
IFG, IFA,
IFL, IFU,
};
enum struct value_type {
REG, // register - 00-07 REGISTER, 1B SP, 1C PC, 1D EX
LIT, // literal - 1F NEXT WORD and 20-3F LITERAL VALUE.
STK, // stack - 18 PUSH/POP, 19 PEEK, 1A PICK
RVL, // register value - 08-0F [REGISTER] if value is 0, 10-17 [REGISTER + NEXT WORD] otherwise
NVL, // next value - 1E [NEXT WORD]
};
struct value {
value_type type;
int value;
};
struct instruction {
instr_type type;
list<value> values;
};
list<token>
tokenise(string::iterator begin, string::iterator end)
{
list<token> tokens;
string::iterator iter = begin;
while (iter != end) {
cout << *iter;
if (*iter == ',') {
tokens.emplace_back(token_type::COMMA, ",");
++iter;
}
else if (*iter == '.') {
tokens.emplace_back(token_type::DOT, ".");
++iter;
}
else if (*iter == '\n') {
tokens.emplace_back(token_type::NL, "\n");
++iter;
}
else if (*iter == ':') {
tokens.emplace_back(token_type::COLON, ":");
++iter;
}
else if (*iter == ';') {
tokens.emplace_back(token_type::SEMI, ";");
++iter;
}
else if (*iter == '"') {
string str = "";
++iter;
while (iter != end && *iter != '"') {
str.push_back(*iter);
++iter;
}
// Skip final quotation mark
if (iter != end) ++iter;
tokens.emplace_back(token_type::STRING, str);
continue;
}
else if (*iter == ' ') {
++iter;
}
else if (*iter == '0') {
cout << *iter << "advancing next character" << endl;
++iter;
cout << *iter << "advanced to next character" << endl;
if (iter == end || *iter == ' ') {
cout << *iter << "end or space" << endl;
tokens.emplace_back(token_type::DEC, "0");
}
else if (*iter == 'o' || *iter >= '0' && *iter <= '7') {
// octal
string tok;
// skip the octal designator
if (*iter == 'o') ++iter;
while (iter != end && *iter != ' ') {
tok.push_back(*iter);
++iter;
}
tokens.emplace_back(token_type::OCT, tok);
}
else if (*iter == 'x') {
// hexidecimal
string tok;
// skip the hex designator
++iter;
while (iter != end && *iter != ' ') {
tok.push_back(*iter);
++iter;
}
tokens.emplace_back(token_type::HEX, tok);
}
else if (*iter == 'b') {
// binary
string tok;
// skip the binary designator
++iter;
while (iter != end && *iter != ' ') {
tok.push_back(*iter);
++iter;
}
tokens.emplace_back(token_type::BIN, tok);
}
continue;
}
else {
// ident | decimal
if (*iter >= '0' && *iter <= '9') {
// decimal
string tok;
while (iter != end && *iter != ' ') {
tok.push_back(*iter);
++iter;
}
tokens.emplace_back(token_type::DEC, tok);
}
else if (*iter == '_' ||
*iter >= 'A' && *iter <= 'Z' ||
*iter >= 'a' && *iter <= 'z') {
// identifier
string tok;
while (iter != end &&
(*iter == '_' ||
*iter >= 'A' && *iter <= 'Z' ||
*iter >= 'a' && *iter <= 'z')) {
tok.push_back(*iter);
++iter;
}
tokens.emplace_back(token_type::ID, tok);
}
}
} // end while-loop
return tokens;
} // end function
int main()
{
// string str = "0123 \"0b123\" 0x123";
string str = "set pc, 0x123";
map<token_type, string> tt_index;
tt_index[token_type::ID] = "identifier";
tt_index[token_type::HEX] = "hexidecimal integer";
tt_index[token_type::DEC] = "decimal integer";
tt_index[token_type::OCT] = "octal integer";
tt_index[token_type::BIN] = "binary integer";
tt_index[token_type::COMMA] = "comma";
tt_index[token_type::DOT] = "full stop/dot/period";
tt_index[token_type::COLON] = "colon";
tt_index[token_type::SEMI] = "semi-colon";
tt_index[token_type::STRING] = "double-quote delimited string";
tt_index[token_type::NL] = "newline";
list<token> tokens = tokenise(begin(str), end(str));
cout << endl << endl << "==PRINTING NEXT TOKEN==" << endl;
for (token& tok : tokens) {
cout << "[" << tt_index[tok.type] << ":" << tok.value << ']' << endl;
}
return 0;
}
@Brianzhengca
Copy link

how is this messy !?

@milesrout
Copy link
Author

There's a 120-line function.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment