Skip to content

Instantly share code, notes, and snippets.

@m0r13
Last active August 29, 2015 14:06
Show Gist options
  • Select an option

  • Save m0r13/89638b6e2421a1929b97 to your computer and use it in GitHub Desktop.

Select an option

Save m0r13/89638b6e2421a1929b97 to your computer and use it in GitHub Desktop.
#include <cctype>
#include <iostream>
#include <sstream>
std::string convertUnicodeLiteral(std::string literal) {
// if (!util::startswith(escape, "\\u") ...
literal = literal.substr(2);
std::stringstream ss;
ss << std::hex << literal/*.substr(0, 4)*/;
unsigned int ord;
ss >> ord;
std::cout << "ord " << literal << " = " << ord << std::endl;
std::string str;
if (ord <= 0x7f)
str += (char) ord;
else if (ord <= 0x7ff) {
str += (char) (0b11000000 | (ord >> 6));
str += (char) (0b10000000 | (ord & 0b00111111));
} else if (ord <= 0xffff) {
str += (char) (0b11100000 | (ord >> 12));
str += (char) (0b10000000 | ((ord >> 6) & 0b00111111));
str += (char) (0b10000000 | (ord & 0b00111111));
} else if (ord <= 0x1fffff) {
str += (char) (0b11110000 | (ord >> 18));
str += (char) (0b10000000 | ((ord >> 12) & 0b00111111));
str += (char) (0b10000000 | ((ord >> 6) & 0b00111111));
str += (char) (0b10000000 | (ord & 0b00111111));
} else if (ord <= 0x3ffffff) {
str += (char) (0b11111000 | (ord >> 24));
str += (char) (0b10000000 | ((ord >> 18) & 0b00111111));
str += (char) (0b10000000 | ((ord >> 12) & 0b00111111));
str += (char) (0b10000000 | ((ord >> 6) & 0b00111111));
str += (char) (0b10000000 | (ord & 0b00111111));
} else if (ord <= 0x7fffffff) {
str += (char) (0b11111100 | (ord >> 30));
str += (char) (0b10000000 | ((ord >> 24) & 0b00111111));
str += (char) (0b10000000 | ((ord >> 18) & 0b00111111));
str += (char) (0b10000000 | ((ord >> 12) & 0b00111111));
str += (char) (0b10000000 | ((ord >> 6) & 0b00111111));
str += (char) (0b10000000 | (ord & 0b00111111));
}
return str;
}
std::string replaceUnicodeEscapes(std::string str) {
}
void compare(std::string chr, std::string literal) {
std::cout << "---" << std::endl;
std::cout << "testing " << literal << " = " << chr << std::endl;
std::cout << "chr len " << chr.size() << " " << chr << std::endl;
std::string chr_converted = convertUnicodeLiteral(literal);
std::cout << "literal len " << chr_converted.size() << " " << chr_converted << std::endl;
if (chr != chr_converted) {
std::cout << "ERROR!" << std::endl;
}
std::cout << "---" << std::endl;
}
int main() {
compare("\u0024", "\\u0024");
compare("\u00a2", "\\u00a2");
compare("\u20ac", "\\u20ac");
compare("\u07ff", "\\u7ff");
compare("\uffff", "\\uffff");
compare(u8"\U001fffff", "\\u1fffff");
compare(u8"\U03ffffff", "\\u3ffffff");
compare(u8"\U7fffffff", "\\u7fffffff");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment