Skip to content

Instantly share code, notes, and snippets.

@wrongbranch
Created November 27, 2021 06:51
Show Gist options
  • Save wrongbranch/6161a27d49749ccc7ba20d5b8ea4e220 to your computer and use it in GitHub Desktop.
Save wrongbranch/6161a27d49749ccc7ba20d5b8ea4e220 to your computer and use it in GitHub Desktop.
testUTF8.cpp
/*
testUTF8.cpp
https://onihusube.hatenablog.com/entry/2020/04/03/211442
*/
// defined .cbp
// #define UNICODE
// #define _UNICODE
#include <wchar.h>
#include <windows.h>
#include <testUTF8.h>
#include <exception>
#include <stdexcept>
#include <iomanip>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
#include <memory>
#if 0 // def __GNUC__
#include <string_view>
using namespace std::string_view_literals;
#endif
using namespace std;
#define TXT_WS L"(森鷗外𠮟る 🤔 😢 🙇<200d>♂️ 🎉 😰 😊 😭 😥 終端)"
#define TXT_U8S "(森鷗外𠮟る 🤔 😢 🙇<200d>♂️ 🎉 😰 😊 😭 😥 終端)"
#define TXT_U8RSV u8R"(森鷗外𠮟る 🤔 😢 🙇<200d>♂️ 🎉 😰 😊 😭 😥 終端)"sv
#define FN_WS "out_wc16.txt"
#define FN_U8 "out_utf8.txt"
#define FN_SJ "out_cp932.txt"
int main(int ac, char **av)
{
cout.imbue(locale(""));
wcout.imbue(locale(""));
cout << "Hello world!" << endl;
#if 0 // def __GNUC__
auto u8s = TXT_U8RSV;
cout << reinterpret_cast<const char *>(u8s.data()) << endl;
#endif
const wchar_t *ws = TXT_WS;
const char *u8s = TXT_U8S;
int wlen, len;
wlen = lstrlen(ws);
wcout << wlen << ws << endl; // 43
if(cout.fail()) cout.clear();
if(wcout.fail()) wcout.clear();
len = WideCharToMultiByte(CP_UTF8, 0, ws, wlen, NULL, 0, NULL, NULL);
cout << len << endl; // 79 (CP_UTF8)
string u8scnv(len, '\0');
len = WideCharToMultiByte(CP_UTF8, 0, ws, wlen,
&u8scnv[0], u8scnv.length(), NULL, NULL);
cout << len << u8scnv << endl; // 79
if(cout.fail()) cout.clear();
if(wcout.fail()) wcout.clear();
ofstream ofsu8(FN_U8, ios::out | ios::binary);
ofsu8.write(&u8scnv[0], len); // OK
len = WideCharToMultiByte(CP_ACP, 0, ws, wlen, NULL, 0, NULL, NULL);
cout << len << endl; // 50 (CP_ACP)
string sjscnv(len, '\0');
len = WideCharToMultiByte(CP_ACP, 0, ws, wlen,
&sjscnv[0], sjscnv.length(), NULL, NULL);
cout << len << sjscnv << endl; // 50
if(cout.fail()) cout.clear();
if(wcout.fail()) wcout.clear();
ofstream ofssj(FN_SJ, ios::out | ios::binary);
ofssj.write(&sjscnv[0], len); // skipped non ASCII (same as console)
len = strlen(u8s);
cout << len << u8s << endl; // 79
wlen = MultiByteToWideChar(CP_UTF8, 0, u8s, len, NULL, 0);
cout << wlen << endl; // 43
wstring wscnv(wlen, L'\0');
wlen = MultiByteToWideChar(CP_UTF8, 0, u8s, len, &wscnv[0], wscnv.length());
wcout << wlen << wscnv << endl; // 43 wcout need wcout.imbue(locale(""));
if(cout.fail()) cout.clear();
if(wcout.fail()) wcout.clear();
ofstream ofsws(FN_WS, ios::out | ios::binary);
ofsws.write((char *)(&wscnv[0]), wlen * sizeof(wchar_t)); // OK (LE)
cout << endl;
cout << TXT_ABC << endl;
try{
throw runtime_error("xyz");
}catch(runtime_error &e){
cerr << "runtime: [" << e.what() << "]" << endl;
}catch(exception &e){
cerr << "exception: [" << e.what() << "]" << endl;
}catch(...){
cerr << "exception: [UNKNOWN]" << endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment