Skip to content

Instantly share code, notes, and snippets.

@bbolli
Last active November 3, 2016 10:25
Show Gist options
  • Save bbolli/c8ef50b897e6d6daaa72ff6cd4470ceb to your computer and use it in GitHub Desktop.
Save bbolli/c8ef50b897e6d6daaa72ff6cd4470ceb to your computer and use it in GitHub Desktop.
An efficient and macro-less way to convert wchar_t to UTF-8
/*
* Look Ma, no macros!
*/
/**
* Append the UTF-8 encoding of @a ch to @a cont.
*/
template <class Container>
void to_utf8(Container& cont, wchar_t ch)
{
/*
* Define the bit-twiddling lambdas for the first and any continuation bytes.
*
* The optimizer will inline these and the whole template at the call site.
* See http://godbolt.org/g/ab5zsi for an example.
*/
auto f = [&](int mask, int shift) -> char {
return mask | ((ch >> shift) & 0x3F);
};
auto c = [&](int shift) {
return f(0x80, shift);
};
auto bi = std::back_inserter(cont);
if (ch <= 0x7F)
bi = static_cast<char>(ch);
else if (ch <= 0x7FF)
bi = f(0xC0, 6), bi = c(0);
else if (ch <= 0xFFFF)
bi = f(0xE0, 12), bi = c(6), bi = c(0);
else if (ch <= 0x10FFFF)
bi = f(0xF0, 18), bi = c(12), bi = c(6), bi = c(0);
else
bi = '?'; // error handling is not the point here ;-)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment