Skip to content

Instantly share code, notes, and snippets.

@katahiromz
Created April 21, 2019 22:51
Show Gist options
  • Save katahiromz/58a9782fc1caba8395e92e37e36148cc to your computer and use it in GitHub Desktop.
Save katahiromz/58a9782fc1caba8395e92e37e36148cc to your computer and use it in GitHub Desktop.
WinJIS.h --- Japanese encoding manipulation for Windows.
/* WinJIS.h --- Japanese encoding manipulation for Windows.
* This is public domain software.
* Copyright (C) 2019 Katayama Hirofumi MZ <[email protected]>.
*/
#ifndef WINJIS_H_
#define WINJIS_H_ 2 /* Version 2 */
#ifndef _INC_WINDOWS
#include <windows.h>
#endif
#ifdef __cplusplus
#include <cassert>
#else
#include <assert.h>
#endif
/* JIS code (ISO-2022-JP) */
#define JIS_BYTE_MIN 0x21
#define JIS_BYTE_MAX 0x7E
/* Is it a JIS byte? */
static __inline BOOL is_jis_byte(BYTE ch)
{
return JIS_BYTE_MIN <= ch && ch <= JIS_BYTE_MAX;
}
/* Is it a fullwidth JIS codepoint? */
static __inline BOOL is_jis_code(WORD w)
{
return is_jis_byte(HIBYTE(w)) && is_jis_byte(LOBYTE(w));
}
/* Get the row (KU) of a JIS codepoint. */
static __inline BYTE row_from_jis_code(WORD jis)
{
return (BYTE)(HIBYTE(jis) - JIS_BYTE_MIN + 1);
}
/* Get the column (TEN) of a JIS codepoint. */
static __inline BYTE column_from_jis_code(WORD jis)
{
return (BYTE)(LOBYTE(jis) - JIS_BYTE_MIN + 1);
}
/* Get the JIS codepoint from a position (KUTEN) */
static __inline WORD jis_code_from_kuten(BYTE row, BYTE column)
{
return MAKEWORD(column + 0x20, row + 0x20);
}
/* Shift_JIS code (SJIS; codepage 932) */
#define SJIS_LEAD1_MIN 0x81
#define SJIS_LEAD1_MAX 0x9F
#define SJIS_LEAD2_MIN 0xE0
#define SJIS_LEAD2_MAX 0xEF
#define SJIS_TRAIL1_MIN 0x40
#define SJIS_TRAIL1_MAX 0x7E
#define SJIS_TRAIL2_MIN 0x80
#define SJIS_TRAIL2_MAX 0xFC
#define SJIS_HANKAKU_KANA_MIN 0xA1
#define SJIS_HANKAKU_KANA_MAX 0xDF
/* Is it a SJIS leading byte? */
static __inline BOOL is_sjis_lead(BYTE ch)
{
return (SJIS_LEAD1_MIN <= ch && ch <= SJIS_LEAD1_MAX) ||
(SJIS_LEAD2_MIN <= ch && ch <= SJIS_LEAD2_MAX);
}
/* Is it a SJIS trailing byte? */
static __inline BOOL is_sjis_trail(BYTE ch)
{
return (SJIS_TRAIL1_MIN <= ch && ch <= SJIS_TRAIL1_MAX) ||
(SJIS_TRAIL2_MIN <= ch && ch <= SJIS_TRAIL2_MAX);
}
/* Is it a fullwidth SJIS codepoint? */
static __inline BOOL is_sjis_code(WORD w)
{
return is_sjis_lead(HIBYTE(w)) && is_sjis_trail(LOBYTE(w));
}
/* Is it a SJIS halfwidth kana byte? */
static __inline BOOL is_sjis_hankaku_kana(BYTE b)
{
return SJIS_HANKAKU_KANA_MIN <= b && b <= SJIS_HANKAKU_KANA_MAX;
}
/* EUC-JP */
#define EUCJP_BYTE_MIN 0xA1
#define EUCJP_BYTE_MAX 0xFE
/* Is it an EUCJP byte? */
static __inline BOOL is_eucjp_byte(BYTE b)
{
return EUCJP_BYTE_MIN <= b && b <= EUCJP_BYTE_MAX;
}
/* Is it a fullwidth EUCJP codepoint? */
static __inline BOOL is_eucjp_code(WORD eucjp)
{
return is_eucjp_byte(HIBYTE(eucjp)) && is_eucjp_byte(LOBYTE(eucjp));
}
/* Get the EUCJP codepoint from a JIS codepoint. */
static __inline BOOL eucjp_code_from_jis(WORD jis)
{
BYTE c0 = HIBYTE(jis), c1 = LOBYTE(jis);
return MAKEWORD(c1 - 0x80, c0 - 0x80);
}
/* Get the JIS codepoint from a EUCJP codepoint. */
static __inline BOOL jis_code_from_eucjp(WORD eucjp)
{
BYTE c0 = HIBYTE(eucjp), c1 = LOBYTE(eucjp);
return MAKEWORD(c1 + 0x80, c0 + 0x80);
}
/* JIS <--> SJIS */
/* Get the SJIS codepoint from a JIS codepoint. */
static __inline WORD sjis_code_from_jis(WORD jis)
{
BYTE c0 = HIBYTE(jis), c1 = LOBYTE(jis);
if (c0 & 0x01)
{
c0 >>= 1;
if (c0 < 0x2F)
{
c0 += 0x71;
}
else
{
c0 -= 0x4F;
}
if (c1 > 0x5F)
{
c1 += 0x20;
}
else
{
c1 += 0x1F;
}
}
else
{
c0 >>= 1;
if (c0 < 0x2F)
{
c0 += 0x70;
}
else
{
c0 -= 0x50;
}
c1 += 0x7E;
}
return MAKEWORD(c1, c0);
}
/* Get the JIS codepoint from a SJIS codepoint. */
static __inline WORD jis_code_from_sjis(WORD sjis)
{
BYTE c0 = HIBYTE(sjis), c1 = LOBYTE(sjis);
c0 <<= 1;
if (c1 < 0x9F)
{
if (c0 < 0x3F)
{
c0 += 0x1F;
}
else
{
c0 -= 0x61;
}
if (c1 > 0x7E)
{
c1 -= 0x20;
}
else
{
c1 -= 0x1F;
}
}
else
{
if (c0 < 0x3F)
{
c0 += 0x20;
}
else
{
c0 -= 0x60;
}
c1 -= 0x7E;
}
return MAKEWORD(c1, c0);
}
/* UTF-16 to SJIS */
static __inline INT APIENTRY
WideCharToSJIS(DWORD dwFlags, LPCWSTR pszWide, INT cchWide, LPSTR pszSJIS, INT cchSJIS,
LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar)
{
return WideCharToMultiByte(932, dwFlags, pszWide, cchWide, pszSJIS, cchSJIS,
lpDefaultChar, lpUsedDefaultChar);
}
/* SJIS to UTF-16 */
static __inline INT APIENTRY
SJISToWideChar(DWORD dwFlags, LPCSTR pszSJIS, INT cchSJIS, LPWSTR pszWide, INT cchWide)
{
return MultiByteToWideChar(932, dwFlags, pszSJIS, cchSJIS, pszWide, cchWide);
}
/* JIS escape sequences */
#define JIS_ESC_C6226_1978 "\x1B\x24\x40" /* ESC $ @ */
#define JIS_ESC_X0208_1983 "\x1B\x24\x42" /* ESC $ B */
#define JIS_ESC_X0208_1990 "\x1B\x26\x40\x1B\x24\x42" /* ESC & @ ESC $ B */
#define JIS_ESC_X0212_1990 "\x1B\x24\x28\x44" /* ESC $ ( D */
#define JIS_ESC_X0213_2000_PLANE1 "\x1B\x24\x28\x4F" /* ESC $ ( O */
#define JIS_ESC_X0213_2004_PLANE1 "\x1B\x24\x28\x51" /* ESC $ ( Q */
#define JIS_ESC_X0213_2000_PLANE2 "\x1B\x24\x28\x50" /* ESC $ ( P */
#define JIS_ESC_X0201_LATIN "\x1B\x28\x4A" /* ESC ( J */
#define JIS_ESC_X0201_KATAKANA "\x1B\x28\x49" /* ESC ( I */
#define JIS_ESC_ASCII_OR_INTL "\x1B\x28\x42" /* ESC ( B */
/* unittest */
static __inline void winjis_unittest(void)
{
int lo, hi;
WORD jis, sjis, eucjp;
BYTE ku, ten;
WCHAR utf16, szW[3];
BYTE szA[3];
for (hi = JIS_BYTE_MIN; hi <= JIS_BYTE_MAX; ++hi)
{
for (lo = JIS_BYTE_MIN; lo <= JIS_BYTE_MAX; ++lo)
{
jis = MAKEWORD(lo, hi);
ku = row_from_jis_code(jis);
ten = column_from_jis_code(jis);
sjis = sjis_code_from_jis(jis);
assert(is_jis_code(jis));
assert(jis_code_from_kuten(ku, ten) == jis);
assert(jis_code_from_sjis(sjis) == jis);
}
}
for (hi = EUCJP_BYTE_MIN; hi <= EUCJP_BYTE_MAX; ++hi)
{
for (lo = EUCJP_BYTE_MIN; lo <= EUCJP_BYTE_MAX; ++lo)
{
eucjp = MAKEWORD(lo, hi);
jis = jis_code_from_eucjp(eucjp);
assert(is_eucjp_code(eucjp));
assert(eucjp_code_from_jis(jis) == eucjp);
}
}
assert(!is_sjis_lead('A'));
assert(!is_sjis_lead('a'));
assert(!is_sjis_lead('0'));
assert(!is_sjis_lead(SJIS_HANKAKU_KANA_MIN));
assert(!is_sjis_lead(SJIS_HANKAKU_KANA_MAX));
assert(!is_sjis_lead(0x80));
assert(is_sjis_lead(0x81));
assert(is_sjis_lead(0x9F));
assert(!is_sjis_lead(0xA0));
assert(!is_sjis_lead(0xDF));
assert(is_sjis_lead(0xE0));
assert(is_sjis_lead(0xEF));
assert(!is_sjis_lead(0xF0));
assert(!is_sjis_trail(0x3F));
assert(is_sjis_trail(0x40));
assert(is_sjis_trail(0x7E));
assert(!is_sjis_trail(0x7F));
assert(!is_sjis_trail(0x7F));
assert(is_sjis_trail(0x80));
assert(is_sjis_trail(0xFC));
assert(!is_sjis_trail(0xFD));
/* FULLWIDTH HIRAGANA A */
utf16 = 0x3042;
jis = 0x2422;
sjis = sjis_code_from_jis(jis);
eucjp = eucjp_code_from_jis(jis);
assert(row_from_jis_code(jis) == 4);
assert(column_from_jis_code(jis) == 2);
assert(sjis == 0x82A0);
assert(eucjp == 0xA4A2);
szA[0] = HIBYTE(sjis);
szA[1] = LOBYTE(sjis);
szA[2] = 0;
SJISToWideChar(0, (LPCSTR)szA, 2, szW, 3);
assert(szW[0] == utf16);
WideCharToSJIS(0, szW, 1, (LPSTR)szA, 2, NULL, NULL);
assert(szA[0] == HIBYTE(sjis));
assert(szA[1] == LOBYTE(sjis));
/* FULLWIDTH KATAKANA A */
utf16 = 0x30A2;
jis = 0x2522;
sjis = sjis_code_from_jis(jis);
eucjp = eucjp_code_from_jis(jis);
assert(row_from_jis_code(jis) == 5);
assert(column_from_jis_code(jis) == 2);
assert(sjis == 0x8341);
assert(eucjp == 0xA5A2);
szA[0] = HIBYTE(sjis);
szA[1] = LOBYTE(sjis);
szA[2] = 0;
SJISToWideChar(0, (LPCSTR)szA, 2, szW, 3);
assert(szW[0] == utf16);
WideCharToSJIS(0, szW, 1, (LPSTR)szA, 2, NULL, NULL);
assert(szA[0] == HIBYTE(sjis));
assert(szA[1] == LOBYTE(sjis));
/* KANJI A */
utf16 = 0x4E9C;
jis = 0x3021;
sjis = sjis_code_from_jis(jis);
eucjp = eucjp_code_from_jis(jis);
assert(row_from_jis_code(jis) == 16);
assert(column_from_jis_code(jis) == 1);
assert(sjis == 0x889F);
assert(eucjp == 0xB0A1);
szA[0] = HIBYTE(sjis);
szA[1] = LOBYTE(sjis);
szA[2] = 0;
SJISToWideChar(0, (LPCSTR)szA, 2, szW, 3);
assert(szW[0] == utf16);
WideCharToSJIS(0, szW, 1, (LPSTR)szA, 2, NULL, NULL);
assert(szA[0] == HIBYTE(sjis));
assert(szA[1] == LOBYTE(sjis));
}
#endif /* ndef WINJIS_H_ */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment