Last active
December 16, 2015 00:09
-
-
Save tomthorogood/5345311 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include <cstring> | |
#include <cstdlib> | |
#include <string> | |
#define PACKING_DELIMITER 999999 | |
void tokenize(const char* input) | |
{ | |
std::vector<int> __tokens; //debug | |
const char* inputIndex = input; | |
while(*inputIndex != '\0') | |
{ | |
if(isdigit(*inputIndex)) | |
{ | |
//delimit the digit by finding the next whitespace | |
const char* endTokenIndex = strchr(inputIndex, ' '); | |
//...or the null char, if we're at the end of string | |
if(!endTokenIndex) | |
endTokenIndex = strchr(inputIndex, '\0'); | |
//copy the digit to a temp buffer to convert to int | |
int tokenLength = endTokenIndex - inputIndex + 1; | |
char token[tokenLength]; | |
memcpy(token, inputIndex, tokenLength-1); | |
//make sure to add in a terminating char! | |
token[tokenLength-1] = '\0'; | |
__tokens.push_back(atoi(token)); | |
//move our index to the appropriate spot | |
if(!(*endTokenIndex) == '\0') | |
inputIndex = endTokenIndex + 1; | |
else | |
inputIndex = endTokenIndex; | |
} | |
else | |
{ | |
//we have some sort of string; put in our special symbol | |
__tokens.push_back(PACKING_DELIMITER); | |
//delimit our string by locating the last char before | |
//whitespace | |
const char* endTokenIndex; | |
if(*inputIndex == '"') | |
endTokenIndex = strchr(inputIndex+1, '"'); | |
else | |
{ | |
endTokenIndex = strchr(inputIndex, ' ') - 1; | |
if(!endTokenIndex) | |
endTokenIndex = strchr(inputIndex, '\0'); | |
} | |
//push the number of characters in the string on to our | |
//tokens vector | |
__tokens.push_back(endTokenIndex - inputIndex + 1); | |
//pack characters into ints by bitshifting the characters | |
//as we see them, pushing the resultant int on to our | |
//vector whenever it's full | |
unsigned int packedInt = 0; | |
int packedIntIndex = 0; | |
while(inputIndex <= endTokenIndex) | |
{ | |
packedInt |= *inputIndex << (8 * packedIntIndex); | |
inputIndex++; | |
if(packedIntIndex == 3) | |
{ | |
__tokens.push_back(packedInt); | |
packedIntIndex = 0; | |
packedInt = 0; | |
continue; | |
} | |
packedIntIndex++; | |
} | |
//if we didn't end with a fully packed integer, make sure | |
//to send it anyway | |
if(packedIntIndex != 0) | |
__tokens.push_back(packedInt); | |
inputIndex += 1; | |
} | |
} | |
//debug & testing | |
std::cout << std::endl << "===INTERNAL TOKENS===" << std::endl; | |
for(int i=0; i<__tokens.size(); i++) | |
std::cout << __tokens[i] << std::endl; | |
std::cout << std::endl << "===INTERPRETED TOKENS===" << std::endl; | |
for(int i=0; i<__tokens.size(); i++) | |
{ | |
if(__tokens[i] == PACKING_DELIMITER) | |
{ | |
int tokenLength = __tokens[i+1]; | |
for(int j=0; j<((tokenLength+3)/4); j++) | |
for(int k=0; k<4; k++) | |
{ | |
char unpackedChar = (char) ( __tokens[i+j+2] >> (8*k)); | |
if(unpackedChar != 0) | |
std::cout << unpackedChar; | |
} | |
std::cout << std::endl; | |
i += (tokenLength+3) / 4 + 1; | |
} | |
else | |
std::cout << __tokens[i] << std::endl; | |
} | |
} | |
int main() | |
{ | |
const char test1[] = "42 h✓y hቘha \"this is a string literal\" 56 1 92 5291"; | |
std::cout << "INPUT STRING: " << test1 << std::endl; | |
tokenize(test1); | |
return 0; | |
//output below: | |
/* | |
INPUT STRING: 42 h✓y hቘha "this is a string literal" 56 1 92 5291 | |
===INTERNAL TOKENS=== | |
42 | |
999999 | |
5 | |
-7576 | |
121 | |
999999 | |
6 | |
-7832 | |
24936 | |
999999 | |
26 | |
1768453154 | |
1936269427 | |
1931501856 | |
1852404340 | |
1768693863 | |
1634887028 | |
8812 | |
56 | |
1 | |
92 | |
5291 | |
===INTERPRETED TOKENS=== | |
42 | |
h���y | |
h���ha | |
"this is a string literal" | |
56 | |
1 | |
92 | |
5291 | |
*/ | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment