Skip to content

Instantly share code, notes, and snippets.

@alanduan
Created September 14, 2016 07:40
Show Gist options
  • Save alanduan/aed65c88bc15709c4b98fb492753271e to your computer and use it in GitHub Desktop.
Save alanduan/aed65c88bc15709c4b98fb492753271e to your computer and use it in GitHub Desktop.
mess up with encoding.
#include <iostream>
#include <locale>
#include <algorithm>
int main()
{
wchar_t data[] = L"\u00b1\u03b1\U00024b62"; // { 0xb1 : 0x3b1 : 0xd852 : 0xdf62 : 0 } on windows, while { 0xb1, 0x3b1, 0x24b62, 0 } on linux
//char16_t data[] = u"\u00b1\u03b1\U00024b62"; // works! actually saved as { 0xb1, 0x3b1, 0xd852, 0xdf62, 0 }
//char32_t data[] = U"\u00b1\u03b1\U00024b62"; // works! as { 0xb1, 0x3b1, 0x24b62, 0 }
std::cout << std::hex << std::showbase;
for (const auto& c : data)
std::cout << c << " : " << std::toupper(c, std::locale("")) << '\t';
std::cout << '\n';
const char utf16le[] = {'\xb1', '\x00', '\xb1', '\x03', '\x52', '\xd8', '\x62', '\xdf'};
const char utf16be[] = {'\x00', '\xb1', '\x03', '\xb1', '\xd8', '\x52', '\xdf', '\x62'};
const char ascii[] = {'\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', '\x68'};
std::wstring s;
using char_T = char16_t;
std::transform(
reinterpret_cast<const char_T*>(utf16be),
reinterpret_cast<const char_T*>(utf16be + sizeof(utf16be)),
std::back_inserter(s),
[](const char_T& c)
{
auto temp { c };
temp = temp >> 8 | (temp & 0xff) << 8;
return std::toupper(static_cast<wchar_t>(temp), std::locale(""));
}
);
for (const auto& c : s)
std::cout << c << '\n';
std::cout << '\n';
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment