Created
April 21, 2019 22:51
-
-
Save katahiromz/58a9782fc1caba8395e92e37e36148cc to your computer and use it in GitHub Desktop.
WinJIS.h --- Japanese encoding manipulation for Windows.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* WinJIS.h --- Japanese encoding manipulation for Windows. | |
* This is public domain software. | |
* Copyright (C) 2019 Katayama Hirofumi MZ <[email protected]>. | |
*/ | |
#ifndef WINJIS_H_ | |
#define WINJIS_H_ 2 /* Version 2 */ | |
#ifndef _INC_WINDOWS | |
#include <windows.h> | |
#endif | |
#ifdef __cplusplus | |
#include <cassert> | |
#else | |
#include <assert.h> | |
#endif | |
/* JIS code (ISO-2022-JP) */ | |
#define JIS_BYTE_MIN 0x21 | |
#define JIS_BYTE_MAX 0x7E | |
/* Is it a JIS byte? */ | |
static __inline BOOL is_jis_byte(BYTE ch) | |
{ | |
return JIS_BYTE_MIN <= ch && ch <= JIS_BYTE_MAX; | |
} | |
/* Is it a fullwidth JIS codepoint? */ | |
static __inline BOOL is_jis_code(WORD w) | |
{ | |
return is_jis_byte(HIBYTE(w)) && is_jis_byte(LOBYTE(w)); | |
} | |
/* Get the row (KU) of a JIS codepoint. */ | |
static __inline BYTE row_from_jis_code(WORD jis) | |
{ | |
return (BYTE)(HIBYTE(jis) - JIS_BYTE_MIN + 1); | |
} | |
/* Get the column (TEN) of a JIS codepoint. */ | |
static __inline BYTE column_from_jis_code(WORD jis) | |
{ | |
return (BYTE)(LOBYTE(jis) - JIS_BYTE_MIN + 1); | |
} | |
/* Get the JIS codepoint from a position (KUTEN) */ | |
static __inline WORD jis_code_from_kuten(BYTE row, BYTE column) | |
{ | |
return MAKEWORD(column + 0x20, row + 0x20); | |
} | |
/* Shift_JIS code (SJIS; codepage 932) */ | |
#define SJIS_LEAD1_MIN 0x81 | |
#define SJIS_LEAD1_MAX 0x9F | |
#define SJIS_LEAD2_MIN 0xE0 | |
#define SJIS_LEAD2_MAX 0xEF | |
#define SJIS_TRAIL1_MIN 0x40 | |
#define SJIS_TRAIL1_MAX 0x7E | |
#define SJIS_TRAIL2_MIN 0x80 | |
#define SJIS_TRAIL2_MAX 0xFC | |
#define SJIS_HANKAKU_KANA_MIN 0xA1 | |
#define SJIS_HANKAKU_KANA_MAX 0xDF | |
/* Is it a SJIS leading byte? */ | |
static __inline BOOL is_sjis_lead(BYTE ch) | |
{ | |
return (SJIS_LEAD1_MIN <= ch && ch <= SJIS_LEAD1_MAX) || | |
(SJIS_LEAD2_MIN <= ch && ch <= SJIS_LEAD2_MAX); | |
} | |
/* Is it a SJIS trailing byte? */ | |
static __inline BOOL is_sjis_trail(BYTE ch) | |
{ | |
return (SJIS_TRAIL1_MIN <= ch && ch <= SJIS_TRAIL1_MAX) || | |
(SJIS_TRAIL2_MIN <= ch && ch <= SJIS_TRAIL2_MAX); | |
} | |
/* Is it a fullwidth SJIS codepoint? */ | |
static __inline BOOL is_sjis_code(WORD w) | |
{ | |
return is_sjis_lead(HIBYTE(w)) && is_sjis_trail(LOBYTE(w)); | |
} | |
/* Is it a SJIS halfwidth kana byte? */ | |
static __inline BOOL is_sjis_hankaku_kana(BYTE b) | |
{ | |
return SJIS_HANKAKU_KANA_MIN <= b && b <= SJIS_HANKAKU_KANA_MAX; | |
} | |
/* EUC-JP */ | |
#define EUCJP_BYTE_MIN 0xA1 | |
#define EUCJP_BYTE_MAX 0xFE | |
/* Is it an EUCJP byte? */ | |
static __inline BOOL is_eucjp_byte(BYTE b) | |
{ | |
return EUCJP_BYTE_MIN <= b && b <= EUCJP_BYTE_MAX; | |
} | |
/* Is it a fullwidth EUCJP codepoint? */ | |
static __inline BOOL is_eucjp_code(WORD eucjp) | |
{ | |
return is_eucjp_byte(HIBYTE(eucjp)) && is_eucjp_byte(LOBYTE(eucjp)); | |
} | |
/* Get the EUCJP codepoint from a JIS codepoint. */ | |
static __inline BOOL eucjp_code_from_jis(WORD jis) | |
{ | |
BYTE c0 = HIBYTE(jis), c1 = LOBYTE(jis); | |
return MAKEWORD(c1 - 0x80, c0 - 0x80); | |
} | |
/* Get the JIS codepoint from a EUCJP codepoint. */ | |
static __inline BOOL jis_code_from_eucjp(WORD eucjp) | |
{ | |
BYTE c0 = HIBYTE(eucjp), c1 = LOBYTE(eucjp); | |
return MAKEWORD(c1 + 0x80, c0 + 0x80); | |
} | |
/* JIS <--> SJIS */ | |
/* Get the SJIS codepoint from a JIS codepoint. */ | |
static __inline WORD sjis_code_from_jis(WORD jis) | |
{ | |
BYTE c0 = HIBYTE(jis), c1 = LOBYTE(jis); | |
if (c0 & 0x01) | |
{ | |
c0 >>= 1; | |
if (c0 < 0x2F) | |
{ | |
c0 += 0x71; | |
} | |
else | |
{ | |
c0 -= 0x4F; | |
} | |
if (c1 > 0x5F) | |
{ | |
c1 += 0x20; | |
} | |
else | |
{ | |
c1 += 0x1F; | |
} | |
} | |
else | |
{ | |
c0 >>= 1; | |
if (c0 < 0x2F) | |
{ | |
c0 += 0x70; | |
} | |
else | |
{ | |
c0 -= 0x50; | |
} | |
c1 += 0x7E; | |
} | |
return MAKEWORD(c1, c0); | |
} | |
/* Get the JIS codepoint from a SJIS codepoint. */ | |
static __inline WORD jis_code_from_sjis(WORD sjis) | |
{ | |
BYTE c0 = HIBYTE(sjis), c1 = LOBYTE(sjis); | |
c0 <<= 1; | |
if (c1 < 0x9F) | |
{ | |
if (c0 < 0x3F) | |
{ | |
c0 += 0x1F; | |
} | |
else | |
{ | |
c0 -= 0x61; | |
} | |
if (c1 > 0x7E) | |
{ | |
c1 -= 0x20; | |
} | |
else | |
{ | |
c1 -= 0x1F; | |
} | |
} | |
else | |
{ | |
if (c0 < 0x3F) | |
{ | |
c0 += 0x20; | |
} | |
else | |
{ | |
c0 -= 0x60; | |
} | |
c1 -= 0x7E; | |
} | |
return MAKEWORD(c1, c0); | |
} | |
/* UTF-16 to SJIS */ | |
static __inline INT APIENTRY | |
WideCharToSJIS(DWORD dwFlags, LPCWSTR pszWide, INT cchWide, LPSTR pszSJIS, INT cchSJIS, | |
LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar) | |
{ | |
return WideCharToMultiByte(932, dwFlags, pszWide, cchWide, pszSJIS, cchSJIS, | |
lpDefaultChar, lpUsedDefaultChar); | |
} | |
/* SJIS to UTF-16 */ | |
static __inline INT APIENTRY | |
SJISToWideChar(DWORD dwFlags, LPCSTR pszSJIS, INT cchSJIS, LPWSTR pszWide, INT cchWide) | |
{ | |
return MultiByteToWideChar(932, dwFlags, pszSJIS, cchSJIS, pszWide, cchWide); | |
} | |
/* JIS escape sequences */ | |
#define JIS_ESC_C6226_1978 "\x1B\x24\x40" /* ESC $ @ */ | |
#define JIS_ESC_X0208_1983 "\x1B\x24\x42" /* ESC $ B */ | |
#define JIS_ESC_X0208_1990 "\x1B\x26\x40\x1B\x24\x42" /* ESC & @ ESC $ B */ | |
#define JIS_ESC_X0212_1990 "\x1B\x24\x28\x44" /* ESC $ ( D */ | |
#define JIS_ESC_X0213_2000_PLANE1 "\x1B\x24\x28\x4F" /* ESC $ ( O */ | |
#define JIS_ESC_X0213_2004_PLANE1 "\x1B\x24\x28\x51" /* ESC $ ( Q */ | |
#define JIS_ESC_X0213_2000_PLANE2 "\x1B\x24\x28\x50" /* ESC $ ( P */ | |
#define JIS_ESC_X0201_LATIN "\x1B\x28\x4A" /* ESC ( J */ | |
#define JIS_ESC_X0201_KATAKANA "\x1B\x28\x49" /* ESC ( I */ | |
#define JIS_ESC_ASCII_OR_INTL "\x1B\x28\x42" /* ESC ( B */ | |
/* unittest */ | |
static __inline void winjis_unittest(void) | |
{ | |
int lo, hi; | |
WORD jis, sjis, eucjp; | |
BYTE ku, ten; | |
WCHAR utf16, szW[3]; | |
BYTE szA[3]; | |
for (hi = JIS_BYTE_MIN; hi <= JIS_BYTE_MAX; ++hi) | |
{ | |
for (lo = JIS_BYTE_MIN; lo <= JIS_BYTE_MAX; ++lo) | |
{ | |
jis = MAKEWORD(lo, hi); | |
ku = row_from_jis_code(jis); | |
ten = column_from_jis_code(jis); | |
sjis = sjis_code_from_jis(jis); | |
assert(is_jis_code(jis)); | |
assert(jis_code_from_kuten(ku, ten) == jis); | |
assert(jis_code_from_sjis(sjis) == jis); | |
} | |
} | |
for (hi = EUCJP_BYTE_MIN; hi <= EUCJP_BYTE_MAX; ++hi) | |
{ | |
for (lo = EUCJP_BYTE_MIN; lo <= EUCJP_BYTE_MAX; ++lo) | |
{ | |
eucjp = MAKEWORD(lo, hi); | |
jis = jis_code_from_eucjp(eucjp); | |
assert(is_eucjp_code(eucjp)); | |
assert(eucjp_code_from_jis(jis) == eucjp); | |
} | |
} | |
assert(!is_sjis_lead('A')); | |
assert(!is_sjis_lead('a')); | |
assert(!is_sjis_lead('0')); | |
assert(!is_sjis_lead(SJIS_HANKAKU_KANA_MIN)); | |
assert(!is_sjis_lead(SJIS_HANKAKU_KANA_MAX)); | |
assert(!is_sjis_lead(0x80)); | |
assert(is_sjis_lead(0x81)); | |
assert(is_sjis_lead(0x9F)); | |
assert(!is_sjis_lead(0xA0)); | |
assert(!is_sjis_lead(0xDF)); | |
assert(is_sjis_lead(0xE0)); | |
assert(is_sjis_lead(0xEF)); | |
assert(!is_sjis_lead(0xF0)); | |
assert(!is_sjis_trail(0x3F)); | |
assert(is_sjis_trail(0x40)); | |
assert(is_sjis_trail(0x7E)); | |
assert(!is_sjis_trail(0x7F)); | |
assert(!is_sjis_trail(0x7F)); | |
assert(is_sjis_trail(0x80)); | |
assert(is_sjis_trail(0xFC)); | |
assert(!is_sjis_trail(0xFD)); | |
/* FULLWIDTH HIRAGANA A */ | |
utf16 = 0x3042; | |
jis = 0x2422; | |
sjis = sjis_code_from_jis(jis); | |
eucjp = eucjp_code_from_jis(jis); | |
assert(row_from_jis_code(jis) == 4); | |
assert(column_from_jis_code(jis) == 2); | |
assert(sjis == 0x82A0); | |
assert(eucjp == 0xA4A2); | |
szA[0] = HIBYTE(sjis); | |
szA[1] = LOBYTE(sjis); | |
szA[2] = 0; | |
SJISToWideChar(0, (LPCSTR)szA, 2, szW, 3); | |
assert(szW[0] == utf16); | |
WideCharToSJIS(0, szW, 1, (LPSTR)szA, 2, NULL, NULL); | |
assert(szA[0] == HIBYTE(sjis)); | |
assert(szA[1] == LOBYTE(sjis)); | |
/* FULLWIDTH KATAKANA A */ | |
utf16 = 0x30A2; | |
jis = 0x2522; | |
sjis = sjis_code_from_jis(jis); | |
eucjp = eucjp_code_from_jis(jis); | |
assert(row_from_jis_code(jis) == 5); | |
assert(column_from_jis_code(jis) == 2); | |
assert(sjis == 0x8341); | |
assert(eucjp == 0xA5A2); | |
szA[0] = HIBYTE(sjis); | |
szA[1] = LOBYTE(sjis); | |
szA[2] = 0; | |
SJISToWideChar(0, (LPCSTR)szA, 2, szW, 3); | |
assert(szW[0] == utf16); | |
WideCharToSJIS(0, szW, 1, (LPSTR)szA, 2, NULL, NULL); | |
assert(szA[0] == HIBYTE(sjis)); | |
assert(szA[1] == LOBYTE(sjis)); | |
/* KANJI A */ | |
utf16 = 0x4E9C; | |
jis = 0x3021; | |
sjis = sjis_code_from_jis(jis); | |
eucjp = eucjp_code_from_jis(jis); | |
assert(row_from_jis_code(jis) == 16); | |
assert(column_from_jis_code(jis) == 1); | |
assert(sjis == 0x889F); | |
assert(eucjp == 0xB0A1); | |
szA[0] = HIBYTE(sjis); | |
szA[1] = LOBYTE(sjis); | |
szA[2] = 0; | |
SJISToWideChar(0, (LPCSTR)szA, 2, szW, 3); | |
assert(szW[0] == utf16); | |
WideCharToSJIS(0, szW, 1, (LPSTR)szA, 2, NULL, NULL); | |
assert(szA[0] == HIBYTE(sjis)); | |
assert(szA[1] == LOBYTE(sjis)); | |
} | |
#endif /* ndef WINJIS_H_ */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment