Skip to content

Instantly share code, notes, and snippets.

@h2oota
Created December 2, 2012 03:56
Show Gist options
  • Save h2oota/4186890 to your computer and use it in GitHub Desktop.
Save h2oota/4186890 to your computer and use it in GitHub Desktop.
Citrus iconvにARIB-B24デコーダーを組み込む
FreeBSDのiconv(Citrus iconv)にARIB-B24を組み込むための定義ファイルとパッチ
インストール方法:
0 FreeBSDのソースツリーを用意します。以降ソースは/usr/srcにあるものとして説明します
1 (A) esdb 定義ファイルを/usr/src/share/i18n/esdb/MISCにコピーします
2 (B) csmapper 定義ファイルを/usr/src/share/i18n/csmapper/JISにコピーします。
3 (c) パッチをあてます。
patch -d /usr/src/share/i18n < patch-i18n
4 ビルド、インストールします
make -C /usr/src/share/i18n all install
5 (d) パッチ(モジュール)をあてます
patch -d /usr/src/lib/libiconv_modules < patch-citrus_iso2022.c
6 ビルド、インストールします
make -C /usr/src/lib/libiconv_modules all install
7 テストします
perl -e 'for (qw/033 174 306 354 323 105 154 065 176 033 176 261/) {printf "%c", oct($_)}' | iconv -t utf-8 -f arib-b24-si
テレビ東京1 と変換されるはずです。
(A) esdb 定義ファイル:
ARIB-B24-SI.src
(B) csmapper 定義ファイル:
JISX0201-KANA@7%UCS.src
JISX0208-HIRAKANA%UCS.src
JISX0208-KATAKANA%UCS.src
UCS%[email protected]
UCS%JISX0208-HIRAKANA.src
UCS%JISX0208-KATAKANA.src
(C) パッチ:
patch-i18n
(D) パッチ(モジュール)
patch-citrus_iso2022.c
NAME "ARIB-B24-SI"
ENCODING "ISO2022"
# ARIB TR-B14
# G0 KANJI
# G1 EISU
# G2 HIRAKANA
# G3 KATAKANA
# VARIABLE "8BIT MAX3 INIT0=94$9 0=94B 0=94J 0=940 0=941 0=94I 0=94$B 0=94$9 0=94$: 0=94$; INIT1=94I 1=94B 1=94J 1=940 1=941 1=94I 1=94$B 1=94$9 1=94$: 1=94$; INIT2=940 2=94B 2=94J 2=940 2=941 2=94I 2=94$B 2=94$9 2=94$: 2=94$; INIT3=941 3=94B 3=94J 3=940 3=941 3=94I 3=94$B 3=94$9 3=94$: 3=94$; NOOLD LS0 LS1 LS2 LS2 LS1R LS2R LS3R SS2 SS3"
VARIABLE "8BIT MAX3 INIT0=94$9 INIT1=94J INIT2=940 INIT3=941 NOOLD LS0 LS1 LS2 LS2 LS1R LS2R LS3R SS2 SS3 ARIB-B24"
DEFCSID "ISO646-US" 0
#
DEFCSID "JISX0201:GR" 0x49000000
DEFCSID "ISO646-JP" 0x4A000000
DEFCSID "JISX0208-HIRAKANA" 0x30000000
DEFCSID "JISX0208-KATAKANA" 0x31000000
#
DEFCSID "JISX0208:1990" 0x42007F00
DEFCSID "JISX0213-1" 0x39007F00
DEFCSID "JISX0213-2" 0x3a007F00
DEFCSID "JISX0212" 0x3b007F00
INVALID 0x4200222E # GETA
TYPE ROWCOL
NAME JISX0201:GR/UCS
SRC_ZONE 0x21-0x5f
OOB_MODE ILSEQ
DST_ILSEQ 0xFFFE
DST_UNIT_BITS 16
BEGIN_MAP
0x21 - 0x5F = 0xFF61 -
END_MAP
# $FreeBSD$
# $NetBSD: JISX0201-KANA%UCS.src,v 1.3 2006/10/26 16:19:02 tnozaki Exp $
TYPE ROWCOL
NAME JISX0208-HIRAKANA/UCS
SRC_ZONE 0x21-0x7e
OOB_MODE ILSEQ
DST_ILSEQ 0xFFFE
DST_UNIT_BITS 16
BEGIN_MAP
0x21-0x73 = 0x3041 -
0x77-0x78 = 0x309D -
0x79 = 0xff0d
0x7a = 0x3002
0x7b - 0x7c = 0x300c -
0x7d = 0x3001
0x7e = 0x30fb
END_MAP
# $FreeBSD$
# $NetBSD: JISX0201-KANA%UCS.src,v 1.3 2006/10/26 16:19:02 tnozaki Exp $
TYPE ROWCOL
NAME JISX0208-KATAKANA/UCS
SRC_ZONE 0x21-0x7e
OOB_MODE ILSEQ
DST_ILSEQ 0xFFFE
DST_UNIT_BITS 16
BEGIN_MAP
0x21-0x76 = 0x30a1 -
0x77-0x78 = 0x30FD -
0x79 = 0x30fc
0x7a = 0x3002
0x7b - 0x7c = 0x300c -
0x7d = 0x3001
0x7e = 0x30fb
END_MAP
Index: ISO2022/citrus_iso2022.c
===================================================================
--- ISO2022/citrus_iso2022.c (revision 242935)
+++ ISO2022/citrus_iso2022.c (working copy)
@@ -121,6 +121,7 @@
#define F_SS3 0x1000 /*ESC O*/
#define F_SS2R 0x2000 /*8E*/
#define F_SS3R 0x4000 /*8F*/
+#define F_ARIB_B24 0x8000 /* violate to IS0 2022 encoding rule */
} _ISO2022EncodingInfo;
#define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
@@ -340,6 +341,7 @@
{ "SS3", F_SS3 },
{ "SS2R", F_SS2R },
{ "SS3R", F_SS3R },
+ { "ARIB-B24", F_ARIB_B24},
{ NULL, 0 }
};
int i;
@@ -526,7 +528,7 @@
static int
seqmatch(const char * __restrict s, size_t n,
- const struct seqtable * __restrict sp)
+ const struct seqtable * __restrict sp, int flags)
{
const int *p;
@@ -538,7 +540,9 @@
goto terminate;
break;
case OECMA:
- if (*s && strchr("@AB", *s))
+ if (*s && (strchr("@AB", *s)
+ || ((flags & F_ARIB_B24)
+ && strchr("9:;", *s))))
break;
else
goto terminate;
@@ -571,7 +575,7 @@
}
static wchar_t
-_ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei __unused,
+_ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
char * __restrict string, size_t n, char ** __restrict result,
_ISO2022State * __restrict psenc)
{
@@ -609,7 +613,7 @@
/* look for a perfect match from escape sequences */
for (sp = &seqtable[0]; sp->len; sp++) {
- nmatch = seqmatch(string, n, sp);
+ nmatch = seqmatch(string, n, sp, ei->flags);
if (sp->len == nmatch && n >= (size_t)(sp->len))
break;
}
@@ -685,7 +689,7 @@
* head. otherwise, wait till full escape sequence comes.
*/
for (sp = &seqtable[0]; sp->len; sp++) {
- nmatch = seqmatch(string, n, sp);
+ nmatch = seqmatch(string, n, sp, ei->flags);
if (!nmatch)
continue;
Index: csmapper/JIS/Makefile
===================================================================
--- csmapper/JIS/Makefile (revision 242935)
+++ csmapper/JIS/Makefile (working copy)
@@ -12,6 +12,8 @@
UCS%[email protected] UCS%[email protected] UCS%JISX0208UDC.mps \
UCS%[email protected] UCS%[email protected] UCS%JISX0212UDC.mps \
UCS@BMP%JISX0213-1.mps UCS@SIP%JISX0213-1.mps UCS@BMP%JISX0213-2.mps \
- UCS@SIP%JISX0213-2.mps
-
+ UCS@SIP%JISX0213-2.mps \
+ JISX0208-HIRAKANA%UCS.mps JISX0208-KATAKANA%UCS.mps \
+ UCS%JISX0208-HIRAKANA.mps UCS%JISX0208-KATAKANA.mps \
+ JISX0201@GR%UCS.mps UCS%[email protected]
.include "../Makefile.part"
Index: csmapper/JIS/charset.pivot.JIS.src
===================================================================
--- csmapper/JIS/charset.pivot.JIS.src (revision 242935)
+++ csmapper/JIS/charset.pivot.JIS.src (working copy)
@@ -2,6 +2,7 @@
# $NetBSD: charset.pivot.JIS.src,v 1.4 2007/03/05 16:58:34 tnozaki Exp $
JISX0201-KANA UCS 1
+JISX0201-KANA:7 UCS 1
JISX0208:1978 UCS 1
JISX0208:1990 UCS 1
JISX0208:EUC UCS 1
@@ -11,7 +12,10 @@
JISX0213-1 UCS 1
JISX0213-2 UCS 1
JISX0212+0213-2 UCS 1
+JISX0208-HIRAKANA UCS 1
+JISX0208-KATAKANA UCS 1
UCS JISX0201-KANA 1
+UCS JISX0201:GR 1
UCS JISX0208:1978 1
UCS JISX0208:1990 1
UCS JISX0208:EUC 1
@@ -21,3 +25,6 @@
UCS JISX0213-1 1
UCS JISX0213-2 1
UCS JISX0212+0213-2 1
+UCS JISX0208-HIRAKANA 1
+UCS JISX0208-KATAKANA 1
+
Index: csmapper/JIS/mapper.dir.JIS.src
===================================================================
--- csmapper/JIS/mapper.dir.JIS.src (revision 242935)
+++ csmapper/JIS/mapper.dir.JIS.src (working copy)
@@ -2,6 +2,9 @@
# $NetBSD: mapper.dir.JIS.src,v 1.5 2007/03/05 16:58:34 tnozaki Exp $
JISX0201-KANA/UCS mapper_std JIS/JISX0201-KANA%UCS.mps
+JISX0201:GR/UCS mapper_std JIS/JISX0201@GR%UCS.mps
+JISX0208-HIRAKANA/UCS mapper_std JIS/JISX0208-HIRAKANA%UCS.mps
+JISX0208-KATAKANA/UCS mapper_std JIS/JISX0208-KATAKANA%UCS.mps
JISX0208:1978/UCS mapper_std JIS/JISX0208@1978%UCS.mps
JISX0208:1990/UCS mapper_std JIS/JISX0208@1990%UCS.mps
JISX0208:MS/UCS mapper_std JIS/JISX0208@MS%UCS.mps
@@ -22,6 +25,7 @@
JISX0213-2/UCS mapper_parallel JISX0213-2/UCS:BMP,JISX0213-2/UCS:SIP
JISX0212+0213-2/UCS mapper_parallel JISX0213-2/UCS,JISX0212/UCS
UCS/JISX0201-KANA mapper_std JIS/UCS%JISX0201-KANA.mps
+UCS/JISX0201:GR mapper_std JIS/UCS%[email protected]
UCS/JISX0208:1978 mapper_std JIS/UCS%[email protected]
UCS/JISX0208:1990 mapper_std JIS/UCS%[email protected]
UCS/JISX0208:MS mapper_std JIS/UCS%[email protected]
@@ -41,3 +45,5 @@
UCS:SIP/JISX0213-2 mapper_std JIS/UCS@SIP%JISX0213-2.mps
UCS/JISX0213-2 mapper_parallel UCS:BMP/JISX0213-2,UCS:SIP/JISX0213-2
UCS/JISX0212+0213-2 mapper_parallel UCS/JISX0213-2,UCS/JISX0212
+UCS/JISX0208-HIRAKANA mapper_std JIS/UCS%JISX0208-HIRAKANA
+UCS/JISX0208-KATAKANA mapper_std JIS/UCS%JISX0208-KATAKANA
Index: esdb/MISC/MISC.part
===================================================================
--- esdb/MISC/MISC.part (revision 242935)
+++ esdb/MISC/MISC.part (working copy)
@@ -1,6 +1,7 @@
# $FreeBSD$
# $NetBSD: esdb.dir.MISC.src,v 1.18 2007/04/01 18:52:31 tnozaki Exp $
+ARIB-B24-SI
ATARIST
C99
CTEXT
TYPE ROWCOL
NAME "UCS/JISX0201:GR"
SRC_ZONE 0x0000 - 0xFFEC
OOB_MODE INVALID
DST_INVALID 0xFF
DST_UNIT_BITS 32
BEGIN_MAP
0xff61-0xff9f = 0x21 -
END_MAP
# $FreeBSD$
# $NetBSD: JISX0201-KANA%UCS.src,v 1.3 2006/10/26 16:19:02 tnozaki Exp $
TYPE ROWCOL
NAME UCS/JISX0208-HIRAKANA
SRC_ZONE 0x0000 - 0xFFEC
OOB_MODE INVALID
DST_ILSEQ 0xFF
DST_UNIT_BITS 16
BEGIN_MAP
0x3041 - 0x3093 = 0x21 -
0x309D - 0x309E = 0x77 -
0xff0d = 0x79
0x3001 = 0x7d
0x3002 = 0x7a
0x300c - 0x300d = 0x7b -
0x30fb =0x7e
END_MAP
# $FreeBSD$
# $NetBSD: JISX0201-KANA%UCS.src,v 1.3 2006/10/26 16:19:02 tnozaki Exp $
TYPE ROWCOL
NAME UCS/JISX0208-KATAKANA
SRC_ZONE 0x0000 - 0xFFEC
OOB_MODE INVALID
DST_ILSEQ 0xFF
DST_UNIT_BITS 16
BEGIN_MAP
0x30a1 - 0x30fb = 0x21 -
0x30fc = 0x79
0x30FD - 0x30FE = 0x77 -
0x3001 = 0x7d
0x3002 = 0x7a
0x300c - 0x300d = 0x7b
0x30fb = 0x7e
END_MAP
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment