VerTiGoEtrex · June 26, 2015 21:25
diff --git a/RecruiterTestGen.cpp b/RecruiterTestGen.cpp
 #include <iostream>
 #include <sstream>
 #include <string>
 #include <random>

 using std::string;
 using std::ostringstream;

 const int arraySize = 10;
 const int charSpace = 97;
 const int offset = 31;
 const int lowerboundCodepoint = 12353; // Japanese hiragana beginning
 const int upperboundCodepoint = 12543; // Japanese katakana ending
 auto toEncode = string{"EncryptionTeXt!\n"};

 //Randomness
 std::default_random_engine generator(std::chrono::system_clock::now().time_since_epoch().count());
 std::uniform_int_distribution<int> distribution(lowerboundCodepoint / charSpace, upperboundCodepoint / charSpace); // if ubCP | charSpace, then last region is useless, but who cares?
 static_assert(upperboundCodepoint - lowerboundCodepoint >= charSpace, "Not enough room to encode character space one-to-one");

 //http://dev.networkerror.org/utf8/?start=12500&end=12600&cols=4&show_uni_int=on&show_uni_hex=on&show_html_ent=on&show_raw_hex=on&show_raw_bin=on

 string utf8chr(int cp) {
    char c[5]={ 0x00,0x00,0x00,0x00,0x00 };
    if     (cp<=0x7F) { c[0] = cp;  }
    else if(cp<=0x7FF) { c[0] = (cp>>6)+192; c[1] = (cp&63)+128; }
    else if(0xd800<=cp && cp<=0xdfff) {} //invalid block of utf8
    else if(cp<=0xFFFF) { c[0] = (cp>>12)+224; c[1]= ((cp>>6)&63)+128; c[2]=(cp&63)+128; }
    else if(cp<=0x10FFFF) { c[0] = (cp>>18)+240; c[1] = ((cp>>12)&63)+128; c[2] = ((cp>>6)&63)+128; c[3]=(cp&63)+128; }
    return string(c);
 }

 string getUnicodeEncoded(char c) {
  if (c != '\n' && (c < 32 || c > 127))
    throw string{"Ordinal not in encoding range! Must be text or newline."};
  if (c == '\n')
    c = 31; // convert \n to locale unit seperator
  c -= 31; // Normalize so newline is 0

  // Get the encoded character
  string encodedChar;
  int codepoint;
  while (encodedChar.empty()) {
    codepoint = distribution(generator) * charSpace + c;
    if (codepoint < lowerboundCodepoint || codepoint > upperboundCodepoint)
      continue;
    encodedChar = utf8chr(codepoint);
  }
  return encodedChar;
 }

 int main() {
  // printf is nicer than cout -- sorry!
  printf("const char* encoded[%d];\n", arraySize);
  for (int i = 0; i < arraySize; ++i) {
    ostringstream encoded;
    for (int j = 0; j < toEncode.size(); ++j) {
      encoded << getUnicodeEncoded(toEncode[j]);
    }
    printf("encoded[%d] = \"%s\";\n", i, encoded.str().c_str());
  }
 }
	#include <iostream>
	#include <sstream>
	#include <string>
	#include <random>

	using std::string;
	using std::ostringstream;

	const int arraySize = 10;
	const int charSpace = 97;
	const int offset = 31;
	const int lowerboundCodepoint = 12353; // Japanese hiragana beginning
	const int upperboundCodepoint = 12543; // Japanese katakana ending
	auto toEncode = string{"EncryptionTeXt!\n"};

	//Randomness
	std::default_random_engine generator(std::chrono::system_clock::now().time_since_epoch().count());
	std::uniform_int_distribution<int> distribution(lowerboundCodepoint / charSpace, upperboundCodepoint / charSpace); // if ubCP \| charSpace, then last region is useless, but who cares?
	static_assert(upperboundCodepoint - lowerboundCodepoint >= charSpace, "Not enough room to encode character space one-to-one");

	//http://dev.networkerror.org/utf8/?start=12500&end=12600&cols=4&show_uni_int=on&show_uni_hex=on&show_html_ent=on&show_raw_hex=on&show_raw_bin=on

	string utf8chr(int cp) {
	char c[5]={ 0x00,0x00,0x00,0x00,0x00 };
	if (cp<=0x7F) { c[0] = cp; }
	else if(cp<=0x7FF) { c[0] = (cp>>6)+192; c[1] = (cp&63)+128; }
	else if(0xd800<=cp && cp<=0xdfff) {} //invalid block of utf8
	else if(cp<=0xFFFF) { c[0] = (cp>>12)+224; c[1]= ((cp>>6)&63)+128; c[2]=(cp&63)+128; }
	else if(cp<=0x10FFFF) { c[0] = (cp>>18)+240; c[1] = ((cp>>12)&63)+128; c[2] = ((cp>>6)&63)+128; c[3]=(cp&63)+128; }
	return string(c);
	}

	string getUnicodeEncoded(char c) {
	if (c != '\n' && (c < 32 \|\| c > 127))
	throw string{"Ordinal not in encoding range! Must be text or newline."};
	if (c == '\n')
	c = 31; // convert \n to locale unit seperator
	c -= 31; // Normalize so newline is 0

	// Get the encoded character
	string encodedChar;
	int codepoint;
	while (encodedChar.empty()) {
	codepoint = distribution(generator) * charSpace + c;
	if (codepoint < lowerboundCodepoint \|\| codepoint > upperboundCodepoint)
	continue;
	encodedChar = utf8chr(codepoint);
	}
	return encodedChar;
	}

	int main() {
	// printf is nicer than cout -- sorry!
	printf("const char* encoded[%d];\n", arraySize);
	for (int i = 0; i < arraySize; ++i) {
	ostringstream encoded;
	for (int j = 0; j < toEncode.size(); ++j) {
	encoded << getUnicodeEncoded(toEncode[j]);
	}
	printf("encoded[%d] = \"%s\";\n", i, encoded.str().c_str());
	}
	}
No results found