Skip to content

Instantly share code, notes, and snippets.

@Bak-Jin-Hyeong
Created January 26, 2018 04:13
Show Gist options
  • Save Bak-Jin-Hyeong/322a9ca6399cb477ebd4ab0e8c9d03c0 to your computer and use it in GitHub Desktop.
Save Bak-Jin-Hyeong/322a9ca6399cb477ebd4ab0e8c9d03c0 to your computer and use it in GitHub Desktop.
#ifndef APPEND_UNESCAPED_JSON_STRING__HPP__
#define APPEND_UNESCAPED_JSON_STRING__HPP__
//
// usage:
//
// const char json[] = u8"유티엪후-8! XYZ, \\uAC00, \\uD600\\uc5c9, QVW \\u0X 유티엪후-8!, 써로게이트 \\uD83D\\uDCA9 페어!";
// const auto json_length = strlen(json);
// std::string s;
// append_unescaped_json_string(json, json_length, [&s](auto c) {
// s.push_back(c);
// });
//
//
template<typename AppendFn>
int append_codepoint_as_utf8(unsigned int codepoint, AppendFn append)
{
if (codepoint <= 0x7F)
{
append(static_cast<char>(codepoint));
return 1;
}
else if (codepoint <= 0x7FF)
{
append(static_cast<char>(0xC0 + ((codepoint >> 6) & 0x1F)));
append(static_cast<char>(0x80 + (codepoint & 0x3F)));
return 2;
}
else if (codepoint <= 0xFFFF)
{
append(static_cast<char>(0xE0 + ((codepoint >> 12) & 0xF)));
append(static_cast<char>(0x80 + ((codepoint >> 6) & 0x3F)));
append(static_cast<char>(0x80 + (codepoint & 0x3F)));
return 3;
}
else if (codepoint <= 0x10FFFF)
{
append(static_cast<char>(0xF0 + ((codepoint >> 18) & 0x7)));
append(static_cast<char>(0x80 + ((codepoint >> 12) & 0x3F)));
append(static_cast<char>(0x80 + ((codepoint >> 6) & 0x3F)));
append(static_cast<char>(0x80 + (codepoint & 0x3F)));
return 4;
}
else
{
return 0;
}
}
template<typename AppendFn>
void append_unescaped_json_string(const char* p, size_t length, AppendFn append)
{
struct SurrogateState
{
char high_surrogate_rep[8]{};
int high_surrogate = 0;
void rollback(AppendFn append)
{
if (high_surrogate)
{
for (int i = 0; i < 6; ++i)
{
append(high_surrogate_rep[i]);
}
}
high_surrogate = 0;
}
} surrogate_state;
for (size_t position = 0; position < length;)
{
const auto current = p[position];
if (current != '\\' || position + 1 == length)
{
surrogate_state.rollback(append);
append(current);
++position;
}
else if (p[position + 1] == 'u')
{
position += 2;
unsigned int codepoint = 0;
int hex_length = 0;
for (; hex_length < 4 && position < length; ++hex_length)
{
const auto c = p[position];
if (c >= '0' && c <= '9')
{
codepoint <<= 4;
codepoint += c - '0';
}
else if (c >= 'a' && c <= 'f')
{
codepoint <<= 4;
codepoint += c - 'a' + 10;
}
else if (c >= 'A' && c <= 'F')
{
codepoint <<= 4;
codepoint += c - 'A' + 10;
}
else
{
break;
}
++position;
}
if (hex_length != 4)
{
surrogate_state.rollback(append);
append('\\');
append('u');
for (int k = 0; k <= hex_length; ++k)
{
append(p[position - (hex_length - k)]);
}
++position;
}
else if (codepoint >= 0xD800 && codepoint <= 0xDBFF)
{
surrogate_state.rollback(append);
surrogate_state.high_surrogate = codepoint;
for (int k = 0; k < 6; ++k)
{
surrogate_state.high_surrogate_rep[k] =
p[position - 6 + k];
}
}
else if (codepoint >= 0xDC00 && codepoint <= 0xDFFF)
{
if (surrogate_state.high_surrogate)
{
const unsigned int result_codepoint =
((surrogate_state.high_surrogate & 0x3FF) << 10) +
(codepoint & 0x3FF) + 0x10000;
append_codepoint_as_utf8(result_codepoint, append);
surrogate_state.high_surrogate = 0;
}
else
{
surrogate_state.rollback(append);
for (int k = 0; k < 6; ++k)
{
append(p[position - 6 + k]);
}
}
}
else
{
surrogate_state.rollback(append);
append_codepoint_as_utf8(codepoint, append);
}
}
else
{
surrogate_state.rollback(append);
++position;
const auto next = p[position];
switch (next)
{
case '\"': case '/': case '\\':
append(current);
break;
case 'b':
append('\b');
break;
case 'f':
append('\f');
break;
case 'r':
append('\r');
break;
case 'n':
append('\n');
break;
case 't':
append('\t');
break;
default:
append('\\');
append(next);
break;
}
++position;
}
}
}
#endif // #ifndef APPEND_UNESCAPED_JSON_STRING__HPP__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment