Last active
November 3, 2016 10:25
-
-
Save bbolli/c8ef50b897e6d6daaa72ff6cd4470ceb to your computer and use it in GitHub Desktop.
An efficient and macro-less way to convert wchar_t to UTF-8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Look Ma, no macros! | |
*/ | |
/** | |
* Append the UTF-8 encoding of @a ch to @a cont. | |
*/ | |
template <class Container> | |
void to_utf8(Container& cont, wchar_t ch) | |
{ | |
/* | |
* Define the bit-twiddling lambdas for the first and any continuation bytes. | |
* | |
* The optimizer will inline these and the whole template at the call site. | |
* See http://godbolt.org/g/ab5zsi for an example. | |
*/ | |
auto f = [&](int mask, int shift) -> char { | |
return mask | ((ch >> shift) & 0x3F); | |
}; | |
auto c = [&](int shift) { | |
return f(0x80, shift); | |
}; | |
auto bi = std::back_inserter(cont); | |
if (ch <= 0x7F) | |
bi = static_cast<char>(ch); | |
else if (ch <= 0x7FF) | |
bi = f(0xC0, 6), bi = c(0); | |
else if (ch <= 0xFFFF) | |
bi = f(0xE0, 12), bi = c(6), bi = c(0); | |
else if (ch <= 0x10FFFF) | |
bi = f(0xF0, 18), bi = c(12), bi = c(6), bi = c(0); | |
else | |
bi = '?'; // error handling is not the point here ;-) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment