Skip to content

Instantly share code, notes, and snippets.

@Bigcheese
Created June 3, 2014 23:07
Show Gist options
  • Save Bigcheese/9dbea9fd0a63951e37c4 to your computer and use it in GitHub Desktop.
Save Bigcheese/9dbea9fd0a63951e37c4 to your computer and use it in GitHub Desktop.
#include <string>
#include <locale>
namespace unicode {
enum class encoding_form {
utf_8,
utf_16,
utf_32,
};
// code unit
// code point
// text element
enum class char8_t : unsigned char {lulz};
enum class codepoint : uint32_t {adena};
enum class scalar_value : uint32_t {adena};
}
using namespace unicode;
int main(int argc, const char **argv) {
std::string s(argv[1]);
std::locale l("en-US");
converting_iterator<system, encoding_form::utf_8>(std::begin(s));
converting_iterator<system, encoding_form::utf_8>(std::end(s));
// char is assumed to be the system encoding. char8_t is explicitly UTF-8.
std::u16string s16(make_conversion_range<encoding_form::utf_16>(s));
std::u8string s8(make_conversion_range<encoding_form::utf_8, abort_handler>(s));
// In place. What argument type...
to_upper(s8, l);
make_to_upper_range(s8, l);
// No to upper iterator as it's not a 1 to 1 mapping.
// Error. system encoding can only be used for encoding change.
// auto cp = make_codepoint_range(s);
auto cp = make_codepoint_range(s8);
make_to_upper_range(cp, l); // Also takes code points.
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment