Created
January 26, 2018 04:13
-
-
Save Bak-Jin-Hyeong/322a9ca6399cb477ebd4ab0e8c9d03c0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef APPEND_UNESCAPED_JSON_STRING__HPP__ | |
#define APPEND_UNESCAPED_JSON_STRING__HPP__ | |
// | |
// usage: | |
// | |
// const char json[] = u8"유티엪후-8! XYZ, \\uAC00, \\uD600\\uc5c9, QVW \\u0X 유티엪후-8!, 써로게이트 \\uD83D\\uDCA9 페어!"; | |
// const auto json_length = strlen(json); | |
// std::string s; | |
// append_unescaped_json_string(json, json_length, [&s](auto c) { | |
// s.push_back(c); | |
// }); | |
// | |
// | |
template<typename AppendFn> | |
int append_codepoint_as_utf8(unsigned int codepoint, AppendFn append) | |
{ | |
if (codepoint <= 0x7F) | |
{ | |
append(static_cast<char>(codepoint)); | |
return 1; | |
} | |
else if (codepoint <= 0x7FF) | |
{ | |
append(static_cast<char>(0xC0 + ((codepoint >> 6) & 0x1F))); | |
append(static_cast<char>(0x80 + (codepoint & 0x3F))); | |
return 2; | |
} | |
else if (codepoint <= 0xFFFF) | |
{ | |
append(static_cast<char>(0xE0 + ((codepoint >> 12) & 0xF))); | |
append(static_cast<char>(0x80 + ((codepoint >> 6) & 0x3F))); | |
append(static_cast<char>(0x80 + (codepoint & 0x3F))); | |
return 3; | |
} | |
else if (codepoint <= 0x10FFFF) | |
{ | |
append(static_cast<char>(0xF0 + ((codepoint >> 18) & 0x7))); | |
append(static_cast<char>(0x80 + ((codepoint >> 12) & 0x3F))); | |
append(static_cast<char>(0x80 + ((codepoint >> 6) & 0x3F))); | |
append(static_cast<char>(0x80 + (codepoint & 0x3F))); | |
return 4; | |
} | |
else | |
{ | |
return 0; | |
} | |
} | |
template<typename AppendFn> | |
void append_unescaped_json_string(const char* p, size_t length, AppendFn append) | |
{ | |
struct SurrogateState | |
{ | |
char high_surrogate_rep[8]{}; | |
int high_surrogate = 0; | |
void rollback(AppendFn append) | |
{ | |
if (high_surrogate) | |
{ | |
for (int i = 0; i < 6; ++i) | |
{ | |
append(high_surrogate_rep[i]); | |
} | |
} | |
high_surrogate = 0; | |
} | |
} surrogate_state; | |
for (size_t position = 0; position < length;) | |
{ | |
const auto current = p[position]; | |
if (current != '\\' || position + 1 == length) | |
{ | |
surrogate_state.rollback(append); | |
append(current); | |
++position; | |
} | |
else if (p[position + 1] == 'u') | |
{ | |
position += 2; | |
unsigned int codepoint = 0; | |
int hex_length = 0; | |
for (; hex_length < 4 && position < length; ++hex_length) | |
{ | |
const auto c = p[position]; | |
if (c >= '0' && c <= '9') | |
{ | |
codepoint <<= 4; | |
codepoint += c - '0'; | |
} | |
else if (c >= 'a' && c <= 'f') | |
{ | |
codepoint <<= 4; | |
codepoint += c - 'a' + 10; | |
} | |
else if (c >= 'A' && c <= 'F') | |
{ | |
codepoint <<= 4; | |
codepoint += c - 'A' + 10; | |
} | |
else | |
{ | |
break; | |
} | |
++position; | |
} | |
if (hex_length != 4) | |
{ | |
surrogate_state.rollback(append); | |
append('\\'); | |
append('u'); | |
for (int k = 0; k <= hex_length; ++k) | |
{ | |
append(p[position - (hex_length - k)]); | |
} | |
++position; | |
} | |
else if (codepoint >= 0xD800 && codepoint <= 0xDBFF) | |
{ | |
surrogate_state.rollback(append); | |
surrogate_state.high_surrogate = codepoint; | |
for (int k = 0; k < 6; ++k) | |
{ | |
surrogate_state.high_surrogate_rep[k] = | |
p[position - 6 + k]; | |
} | |
} | |
else if (codepoint >= 0xDC00 && codepoint <= 0xDFFF) | |
{ | |
if (surrogate_state.high_surrogate) | |
{ | |
const unsigned int result_codepoint = | |
((surrogate_state.high_surrogate & 0x3FF) << 10) + | |
(codepoint & 0x3FF) + 0x10000; | |
append_codepoint_as_utf8(result_codepoint, append); | |
surrogate_state.high_surrogate = 0; | |
} | |
else | |
{ | |
surrogate_state.rollback(append); | |
for (int k = 0; k < 6; ++k) | |
{ | |
append(p[position - 6 + k]); | |
} | |
} | |
} | |
else | |
{ | |
surrogate_state.rollback(append); | |
append_codepoint_as_utf8(codepoint, append); | |
} | |
} | |
else | |
{ | |
surrogate_state.rollback(append); | |
++position; | |
const auto next = p[position]; | |
switch (next) | |
{ | |
case '\"': case '/': case '\\': | |
append(current); | |
break; | |
case 'b': | |
append('\b'); | |
break; | |
case 'f': | |
append('\f'); | |
break; | |
case 'r': | |
append('\r'); | |
break; | |
case 'n': | |
append('\n'); | |
break; | |
case 't': | |
append('\t'); | |
break; | |
default: | |
append('\\'); | |
append(next); | |
break; | |
} | |
++position; | |
} | |
} | |
} | |
#endif // #ifndef APPEND_UNESCAPED_JSON_STRING__HPP__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment