Skip to content

Instantly share code, notes, and snippets.

@sebastiancarlos
Last active November 2, 2023 12:43
Show Gist options
  • Save sebastiancarlos/177f4a1ad4cc9627f20755837ea16171 to your computer and use it in GitHub Desktop.
Save sebastiancarlos/177f4a1ad4cc9627f20755837ea16171 to your computer and use it in GitHub Desktop.
Tree structure of glibc character sets and their aliases.
# Tree structure of glibc character sets and their aliases.
#
# - Generated with: print-glibc-charset-aliases.bash
# - For glibc 2.38
#
# - Format:
# TARGET
# \_ ALIAS
# \_ ALIAS
# ...
ANSI_X3.110
\_ ANSI_X3.110-1983
\_ CSA_T500-1983
\_ CSA_T500
\_ CSISO99NAPLPS
\_ ISO-IR-99
\_ NAPLPS
ANSI_X3.4-1968
\_ ANSI_X3.4-1986
\_ ANSI_X3.4
\_ ASCII
\_ CP367
\_ CSASCII
\_ IBM367
\_ ISO_646.IRV:1991
\_ ISO646-US
\_ ISO-IR-6
\_ OSF00010020
\_ US
\_ US-ASCII
ARMSCII-8
\_ ARMSCII8
ASMO_449
\_ ARABIC7
\_ CSISO89ASMO449
\_ ISO_9036
\_ ISO-IR-89
BIG5
\_ BIG-5
\_ BIG-FIVE
\_ BIGFIVE
\_ CN-BIG5
\_ CP950
BIG5HKSCS
\_ BIG5-HKSCS
BS_4730
\_ CSISO4UNITEDKINGDOM
\_ GB
\_ ISO646-GB
\_ ISO-IR-4
\_ UK
CP10007
\_ MS-MAC-CYRILLIC
\_ MSMACCYRILLIC
CP1125
\_ IBM848
\_ RUSCII
CP1250
\_ MS-EE
\_ WINDOWS-1250
CP1251
\_ MS-CYRL
\_ WINDOWS-1251
CP1252
\_ MS-ANSI
\_ WINDOWS-1252
CP1253
\_ MS-GREEK
\_ WINDOWS-1253
CP1254
\_ MS-TURK
\_ WINDOWS-1254
CP1255
\_ MS-HEBR
\_ WINDOWS-1255
CP1256
\_ MS-ARAB
\_ WINDOWS-1256
CP1257
\_ WINBALTRIM
\_ WINDOWS-1257
CP1258
\_ WINDOWS-1258
CP775
\_ CSPC775BALTIC
\_ IBM775
CP932
\_ CSWINDOWS31J
\_ MS932
\_ SJIS-OPEN
\_ SJIS-WIN
\_ WINDOWS-31J
CSA_Z243.4-1985-1
\_ CA
\_ CSA7-1
\_ CSA_Z243.419851
\_ CSISO121CANADIAN1
\_ ISO646-CA
\_ ISO-IR-121
CSA_Z243.4-1985-2
\_ CSA7-2
\_ CSA_Z243.419852
\_ CSISO122CANADIAN2
\_ ISO646-CA2
\_ ISO-IR-122
CSN_369103
\_ CSISO139CSN369103
\_ ISO-IR-139
CWI
\_ CP-HU
\_ CWI-2
DEC-MCS
\_ CSDECMCS
\_ DEC
\_ DECMCS
DIN_66003
\_ CSISO21GERMAN
\_ DE
\_ ISO646-DE
\_ ISO-IR-21
DS_2089
\_ CSISO646DANISH
\_ DK
\_ DS2089
\_ ISO646-DK
EBCDIC-AT-DE
\_ CSEBCDICATDE
\_ EBCDICATDE
EBCDIC-AT-DE-A
\_ CSEBCDICATDEA
\_ EBCDICATDEA
EBCDIC-CA-FR
\_ CSEBCDICCAFR
\_ EBCDICCAFR
EBCDIC-DK-NO
\_ CSEBCDICDKNO
\_ EBCDICDKNO
EBCDIC-DK-NO-A
\_ CSEBCDICDKNOA
\_ EBCDICDKNOA
EBCDIC-ES
\_ CSEBCDICES
\_ EBCDICES
EBCDIC-ES-A
\_ CSEBCDICESA
\_ EBCDICESA
EBCDIC-ES-S
\_ CSEBCDICESS
\_ EBCDICESS
EBCDIC-FI-SE
\_ CSEBCDICFISE
\_ EBCDICFISE
EBCDIC-FI-SE-A
\_ CSEBCDICFISEA
\_ EBCDICFISEA
EBCDIC-FR
\_ CSEBCDICFR
\_ EBCDICFR
EBCDIC-IS-FRISS
\_ EBCDICISFRISS
EBCDIC-IT
\_ CSEBCDICIT
\_ EBCDICIT
EBCDIC-PT
\_ CSEBCDICPT
\_ EBCDICPT
EBCDIC-UK
\_ CSEBCDICUK
\_ EBCDICUK
EBCDIC-US
\_ CSEBCDICUS
\_ EBCDICUS
ECMA-CYRILLIC
\_ CSISO111ECMACYRILLIC
\_ ECMACYRILLIC
\_ ISO-IR-111
ES
\_ CSISO17SPANISH
\_ ISO646-ES
\_ ISO-IR-17
ES2
\_ CSISO85SPANISH2
\_ ISO646-ES2
\_ ISO-IR-85
EUC-CN
\_ CN-GB
\_ csGB2312
\_ EUCCN
\_ GB2312
EUC-JP
\_ CSEUCPKDFMTJAPANESE
\_ EUCJP
\_ OSF00030010
\_ UJIS
EUC-JP-MS
\_ EUCJP-MS
\_ EUCJP-OPEN
\_ EUCJP-WIN
EUC-KR
\_ CSEUCKR
\_ EUCKR
\_ OSF0004000a
EUC-TW
\_ EUCTW
\_ OSF0005000a
GB_1988-80
\_ CN
\_ CSISO58GB1988
\_ GB_198880
\_ ISO646-CN
\_ ISO-IR-57
GBK
\_ CP936
\_ GB13000
\_ MS936
\_ WINDOWS-936
GOST_19768-74
\_ CSISO153GOST1976874
\_ GOST_1976874
\_ GOST_19768
\_ ISO-IR-153
\_ ST_SEV_358-88
GREEK7
\_ CSISO88GREEK7
\_ ISO-IR-88
GREEK7-OLD
\_ CSISO18GREEK7OLD
\_ GREEK7OLD
\_ ISO-IR-18
GREEK-CCITT
\_ CSISO150
\_ CSISO150GREEKCCITT
\_ GREEKCCITT
\_ ISO-IR-150
HP-GREEK8
\_ HPGREEK8
\_ OSF10010004
HP-ROMAN8
\_ CSHPROMAN8
\_ HPROMAN8
\_ OSF10010001
\_ R8
\_ ROMAN8
HP-ROMAN9
\_ HPROMAN9
\_ R9
\_ ROMAN9
HP-THAI8
\_ HPTHAI8
\_ THAI8
HP-TURKISH8
\_ HPTURKISH8
\_ OSF10010006
\_ TURKISH8
IBM037
\_ CP037
\_ CP1070
\_ CP282
\_ CSIBM037
\_ EBCDIC-CP-CA
\_ EBCDIC-CP-NL
\_ EBCDIC-CP-US
\_ EBCDIC-CP-WT
\_ OSF10020025
IBM038
\_ CP038
\_ CSIBM038
\_ EBCDIC-INT
IBM1004
\_ CP1004
\_ OS2LATIN1
IBM1008
\_ CP1008
\_ CSIBM1008
\_ IBM-1008
IBM1025
\_ CP1025
\_ CSIBM1025
\_ IBM-1025
IBM1026
\_ 1026
\_ CP1026
\_ CSIBM1026
\_ OSF10020402
IBM1046
\_ 1046
\_ CP1046
\_ IBM-1046
IBM1047
\_ 1047
\_ CP1047
\_ IBM-1047
\_ OSF10020417
IBM1097
\_ CP1097
\_ CSIBM1097
\_ IBM-1097
IBM1112
\_ CP1112
\_ CSIBM1112
\_ IBM-1112
IBM1122
\_ CP1122
\_ CSIBM1122
\_ IBM-1122
IBM1123
\_ CP1123
\_ CSIBM1123
\_ IBM-1123
IBM1124
\_ CP1124
\_ CSIBM1124
\_ IBM-1124
IBM1129
\_ CP1129
\_ CSIBM1129
\_ IBM-1129
IBM1130
\_ CP1130
\_ CSIBM1130
\_ IBM-1130
IBM1132
\_ CP1132
\_ CSIBM1132
\_ IBM-1132
IBM1133
\_ CP1133
\_ CSIBM1133
\_ IBM-1133
IBM1137
\_ CP1137
\_ CSIBM1137
\_ IBM-1137
IBM1140
\_ CP1140
\_ CSIBM1140
\_ IBM-1140
IBM1141
\_ CP1141
\_ CSIBM1141
\_ IBM-1141
IBM1142
\_ CP1142
\_ CSIBM1142
\_ IBM-1142
IBM1143
\_ CP1143
\_ CSIBM1143
\_ IBM-1143
IBM1144
\_ CP1144
\_ CSIBM1144
\_ IBM-1144
IBM1145
\_ CP1145
\_ CSIBM1145
\_ IBM-1145
IBM1146
\_ CP1146
\_ CSIBM1146
\_ IBM-1146
IBM1147
\_ CP1147
\_ CSIBM1147
\_ IBM-1147
IBM1148
\_ CP1148
\_ CSIBM1148
\_ IBM-1148
IBM1149
\_ CP1149
\_ CSIBM1149
\_ IBM-1149
IBM1153
\_ CP1153
\_ CSIBM1153
\_ IBM-1153
IBM1154
\_ CP1154
\_ CSIBM1154
\_ IBM-1154
IBM1155
\_ CP1155
\_ CSIBM1155
\_ IBM-1155
IBM1156
\_ CP1156
\_ CSIBM1156
\_ IBM-1156
IBM1157
\_ CP1157
\_ CSIBM1157
\_ IBM-1157
IBM1158
\_ CP1158
\_ CSIBM1158
\_ IBM-1158
IBM1160
\_ CP1160
\_ CSIBM1160
\_ IBM-1160
IBM1161
\_ CP1161
\_ CSIBM1161
\_ IBM-1161
IBM1162
\_ CP1162
\_ CSIBM11621162
\_ IBM-1162
IBM1163
\_ CP1163
\_ CSIBM1163
\_ IBM-1163
IBM1164
\_ CP1164
\_ CSIBM1164
\_ IBM-1164
IBM1166
\_ CP1166
\_ CSIBM1166
\_ IBM-1166
IBM1167
\_ CP1167
\_ CSIBM1167
\_ IBM-1167
IBM12712
\_ CP12712
\_ CSIBM12712
\_ IBM-12712
IBM1364
\_ CP1364
\_ CSIBM1364
\_ IBM-1364
IBM1371
\_ CP1371
\_ CSIBM1371
\_ IBM-1371
IBM1388
\_ CP1388
\_ CSIBM1388
\_ IBM-1388
IBM1390
\_ CP1390
\_ CSIBM1390
\_ IBM-1390
IBM1399
\_ CP1399
\_ CSIBM1399
\_ IBM-1399
IBM16804
\_ CP16804
\_ CSIBM16804
\_ IBM-16804
IBM256
\_ EBCDIC-INT1
IBM273
\_ CP273
\_ CSIBM273
\_ OSF10020111
IBM274
\_ CP274
\_ CSIBM274
\_ EBCDIC-BE
IBM275
\_ CP275
\_ CSIBM275
\_ EBCDIC-BR
IBM277
\_ CSIBM277
\_ EBCDIC-CP-DK
\_ EBCDIC-CP-NO
\_ OSF10020115
IBM278
\_ CP278
\_ CSIBM278
\_ EBCDIC-CP-FI
\_ EBCDIC-CP-SE
\_ OSF10020116
IBM280
\_ CP280
\_ CSIBM280
\_ EBCDIC-CP-IT
\_ OSF10020118
IBM281
\_ CP281
\_ CSIBM281
\_ EBCDIC-JP-E
IBM284
\_ CP1079
\_ CP284
\_ CSIBM284
\_ EBCDIC-CP-ES
\_ OSF1002011C
IBM285
\_ CP285
\_ CSIBM285
\_ EBCDIC-CP-GB
\_ OSF1002011D
IBM290
\_ CP290
\_ CSIBM290
\_ EBCDIC-JP-KANA
\_ OSF10020122
IBM297
\_ CP1081
\_ CP297
\_ CSIBM297
\_ EBCDIC-CP-FR
\_ OSF10020129
IBM420
\_ CP420
\_ CSIBM420
\_ EBCDIC-CP-AR1
\_ OSF100201A4
IBM423
\_ CP423
\_ CSIBM423
\_ EBCDIC-CP-GR
IBM424
\_ CP424
\_ CSIBM424
\_ EBCDIC-CP-HE
\_ OSF100201A8
IBM437
\_ 437
\_ CP437
\_ CSPC8CODEPAGE437
\_ OSF100201B5
IBM4517
\_ CP4517
\_ CSIBM4517
\_ IBM-4517
IBM4899
\_ CP4899
\_ CSIBM4899
\_ IBM-4899
IBM4909
\_ CP4909
\_ CSIBM4909
\_ IBM-4909
IBM4971
\_ CP4971
\_ CSIBM4971
\_ IBM-4971
IBM500
\_ 500
\_ 500V1
\_ CP1084
\_ CP500
\_ CSIBM500
\_ EBCDIC-CP-BE
\_ EBCDIC-CP-CH
\_ OSF100201F4
IBM5347
\_ CP5347
\_ CSIBM5347
\_ IBM-5347
IBM803
\_ CP803
\_ CSIBM803
\_ IBM-803
IBM850
\_ 850
\_ CP850
\_ CSPC850MULTILINGUAL
\_ OSF10020352
IBM851
\_ 851
\_ CP851
\_ CSIBM851
IBM852
\_ 852
\_ CP852
\_ CSPCP852
\_ OSF10020354
IBM855
\_ 855
\_ CP855
\_ CSIBM855
\_ OSF10020357
IBM856
\_ 856
\_ CP856
\_ CSIBM856
\_ IBM-856
IBM857
\_ 857
\_ CP857
\_ CSIBM857
\_ OSF10020359
IBM858
\_ 858
\_ CP858
\_ CSPC858MULTILINGUAL
IBM860
\_ 860
\_ CP860
\_ CSIBM860
IBM861
\_ 861
\_ CP861
\_ CPIBM861
\_ OSF1002035D
IBM862
\_ 862
\_ CP862
\_ CSPC862LATINHEBREW
\_ OSF1002035E
IBM863
\_ 863
\_ CP863
\_ CSIBM863
\_ OSF1002035F
IBM864
\_ 864
\_ CP864
\_ CSIBM864
\_ OSF10020360
IBM865
\_ 865
\_ CP865
\_ CSIBM865
IBM866
\_ 866
\_ CP866
\_ CSIBM866
IBM866NAV
\_ 866NAV
\_ CP866NAV
IBM868
\_ CP868
\_ CP-AR
\_ CSIBM868
\_ OSF10020364
IBM869
\_ 869
\_ CP869
\_ CP-GR
\_ CSIBM869
\_ OSF10020365
IBM870
\_ CP870
\_ CSIBM870
\_ EBCDIC-CP-ROECE
\_ EBCDIC-CP-YU
\_ OSF10020366
IBM871
\_ CP871
\_ CSIBM871
\_ EBCDIC-CP-IS
\_ OSF10020367
IBM874
\_ 874
\_ CP874
\_ WINDOWS-874
IBM875
\_ CP875
\_ EBCDIC-GREEK
\_ OSF1002036B
IBM880
\_ CP880
\_ CSIBM880
\_ EBCDIC-CYRILLIC
\_ OSF10020370
IBM891
\_ CP891
\_ CSIBM891
\_ OSF1002037B
IBM901
\_ CP901
\_ CSIBM901
\_ IBM-901
IBM902
\_ CP902
\_ CSIBM902
\_ IBM-902
IBM903
\_ CP903
\_ CSIBM903
\_ OSF10020387
IBM9030
\_ CP9030
\_ CSIBM9030
\_ IBM-9030
IBM904
\_ 904
\_ CP904
\_ CSIBM904
\_ OSF10020388
IBM905
\_ CP905
\_ CSIBM905
\_ EBCDIC-CP-TR
IBM9066
\_ CP9066
\_ CSIBM9066
\_ IBM-9066
IBM918
\_ CP918
\_ CSIBM918
\_ EBCDIC-CP-AR2
\_ OSF10020396
IBM921
\_ CP921
\_ CSIBM921
\_ IBM-921
IBM922
\_ CP922
\_ CSIBM922
\_ IBM-922
IBM930
\_ CP930
\_ CSIBM930
\_ IBM-930
IBM932
\_ CSIBM932
\_ IBM-932
IBM933
\_ CP933
\_ CSIBM933
\_ IBM-933
IBM935
\_ CP935
\_ CSIBM935
\_ IBM-935
IBM937
\_ CP937
\_ CSIBM937
\_ IBM-937
IBM939
\_ CP939
\_ CSIBM939
\_ IBM-939
IBM943
\_ CSIBM943
\_ IBM-943
IBM9448
\_ CP9448
\_ CSIBM9448
\_ IBM-9448
IEC_P27-1
\_ CSISO143IECP271
\_ IEC_P271
\_ ISO-IR-143
INIS
\_ CSISO49INIS
\_ ISO-IR-49
INIS-8
\_ CSISO50INIS8
\_ INIS8
\_ ISO-IR-50
INIS-CYRILLIC
\_ CSISO51INISCYRILLIC
\_ INISCYRILLIC
\_ ISO-IR-51
ISIRI-3342
\_ ISIRI3342
ISO_10367-BOX
\_ CSISO10367BOX
\_ ISO_10367BOX
\_ ISO-IR-155
ISO-10646/UCS2
\_ OSF00010100
\_ OSF00010101
\_ OSF00010102
\_ UCS-2BE
\_ UCS-2
\_ UCS2
\_ UCS-2LE
\_ UNICODEBIG
\_ UNICODELITTLE
ISO-10646/UCS4
\_ 10646-1:1993
\_ 10646-1:1993/UCS4
\_ CSUCS4
\_ ISO-10646
\_ OSF00010104
\_ OSF00010105
\_ OSF00010106
\_ UCS-4BE
\_ UCS-4
\_ UCS4
ISO-10646/UTF8
\_ ISO-10646/UTF-8
\_ ISO-IR-193
\_ OSF05010001
\_ UTF-8
\_ UTF8
ISO_11548-1
\_ ISO11548-1
\_ ISO/TR_11548-1/
ISO-2022-CN
\_ CSISO2022CN
\_ ISO2022CN
ISO-2022-CN-EXT
\_ ISO2022CNEXT
ISO-2022-JP
\_ CSISO2022JP
\_ ISO2022JP
ISO-2022-JP-2
\_ CSISO2022JP2
\_ ISO2022JP2
ISO-2022-KR
\_ CSISO2022KR
\_ ISO2022KR
ISO_2033
\_ CSISO2033
\_ E13B
\_ ISO_2033-1983
\_ ISO-IR-98
ISO_5427
\_ CSISO5427CYRILLIC
\_ ISO-IR-37
\_ KOI-7
ISO_5427-EXT
\_ CSISO5427CYRILLIC1981
\_ ISO_5427:1981
\_ ISO_5427EXT
\_ ISO-IR-54
ISO_5428
\_ CSISO5428GREEK
\_ ISO_5428:1980
\_ ISO-IR-55
ISO_6937
\_ ISO_6937:1992
\_ ISO6937
\_ ISO-IR-156
ISO_6937-2
\_ CSISO90
\_ ISO_6937-2:1983
\_ ISO_69372
\_ ISO-IR-90
ISO-8859-1
\_ 8859_1
\_ CP819
\_ CSISOLATIN1
\_ IBM819
\_ ISO_8859-1:1987
\_ ISO_8859-1
\_ ISO8859-1
\_ ISO88591
\_ ISO-IR-100
\_ L1
\_ LATIN1
\_ OSF00010001
ISO-8859-10
\_ CSISOLATIN6
\_ ISO_8859-10:1992
\_ ISO_8859-10
\_ ISO8859-10
\_ ISO885910
\_ ISO-IR-157
\_ L6
\_ LATIN6
\_ OSF0001000A
ISO-8859-11
\_ ISO8859-11
\_ ISO885911
ISO-8859-13
\_ BALTIC
\_ ISO8859-13
\_ ISO885913
\_ ISO-IR-179
\_ L7
\_ LATIN7
ISO-8859-14
\_ ISO_8859-14:1998
\_ ISO_8859-14
\_ ISO8859-14
\_ ISO885914
\_ ISO-CELTIC
\_ ISO-IR-199
\_ L8
\_ LATIN8
ISO-8859-15
\_ ISO_8859-15:1998
\_ ISO_8859-15
\_ ISO8859-15
\_ ISO885915
\_ ISO-IR-203
\_ LATIN-9
\_ LATIN9
ISO-8859-16
\_ ISO_8859-16:2001
\_ ISO_8859-16
\_ ISO8859-16
\_ ISO885916
\_ ISO-IR-226
\_ L10
\_ LATIN10
ISO-8859-2
\_ 8859_2
\_ CP912
\_ CSISOLATIN2
\_ IBM912
\_ ISO_8859-2:1987
\_ ISO_8859-2
\_ ISO8859-2
\_ ISO88592
\_ ISO-IR-101
\_ L2
\_ LATIN2
\_ OSF00010002
ISO-8859-3
\_ 8859_3
\_ CSISOLATIN3
\_ ISO_8859-3:1988
\_ ISO_8859-3
\_ ISO8859-3
\_ ISO88593
\_ ISO-IR-109
\_ L3
\_ LATIN3
\_ OSF00010003
ISO-8859-4
\_ 8859_4
\_ CSISOLATIN4
\_ ISO_8859-4:1988
\_ ISO_8859-4
\_ ISO8859-4
\_ ISO88594
\_ ISO-IR-110
\_ L4
\_ LATIN4
\_ OSF00010004
ISO-8859-5
\_ 8859_5
\_ CP915
\_ CSISOLATINCYRILLIC
\_ CYRILLIC
\_ IBM915
\_ ISO_8859-5:1988
\_ ISO_8859-5
\_ ISO8859-5
\_ ISO88595
\_ ISO-IR-144
\_ OSF00010005
ISO-8859-6
\_ 8859_6
\_ ARABIC
\_ ASMO-708
\_ CP1089
\_ CSISOLATINARABIC
\_ ECMA-114
\_ IBM1089
\_ ISO_8859-6:1987
\_ ISO_8859-6
\_ ISO8859-6
\_ ISO88596
\_ ISO-IR-127
\_ OSF00010006
ISO-8859-7
\_ 8859_7
\_ CP813
\_ CSISOLATINGREEK
\_ ECMA-118
\_ ELOT_928
\_ GREEK8
\_ GREEK
\_ IBM813
\_ ISO_8859-7:1987
\_ ISO_8859-7:2003
\_ ISO_8859-7
\_ ISO8859-7
\_ ISO88597
\_ ISO-IR-126
\_ OSF00010007
ISO-8859-8
\_ 8859_8
\_ CP916
\_ CSISOLATINHEBREW
\_ HEBREW
\_ IBM916
\_ ISO_8859-8:1988
\_ ISO_8859-8
\_ ISO8859-8
\_ ISO88598
\_ ISO-IR-138
\_ OSF00010008
ISO-8859-9
\_ 8859_9
\_ CP920
\_ CSISOLATIN5
\_ ECMA-128
\_ IBM920
\_ ISO_8859-9:1989
\_ ISO_8859-9
\_ ISO8859-9
\_ ISO88599
\_ ISO-IR-148
\_ L5
\_ LATIN5
\_ OSF00010009
\_ TS-5881
ISO-8859-9E
\_ ISO88599E
\_ ISO_8859-9E
\_ ISO8859-9E
IT
\_ CSISO15ITALIAN
\_ ISO646-IT
\_ ISO-IR-15
JIS_C6220-1969-RO
\_ CSISO14JISC6220RO
\_ ISO646-JP
\_ ISO-IR-14
\_ JIS_C62201969RO
\_ JP
JIS_C6229-1984-B
\_ CSISO92JISC62991984B
\_ ISO646-JP-OCR-B
\_ ISO-IR-92
\_ JIS_C62291984B
\_ JP-OCR-B
JOHAB
\_ CP1361
\_ MSCP1361
JUS_I.B1.002
\_ CSISO141JUSIB1002
\_ ISO646-YU
\_ ISO-IR-141
\_ JS
\_ YU
KOI-8
\_ KOI8
KOI8-R
\_ CSKOI8R
\_ KOI8R
KOI8-U
\_ KOI8U
KSC5636
\_ CSKSC5636
\_ ISO646-KR
LATIN-GREEK
\_ CSISO19LATINGREEK
\_ ISO-IR-19
\_ LATINGREEK
LATIN-GREEK-1
\_ CSISO27LATINGREEK1
\_ ISO-IR-27
\_ LATINGREEK1
MAC-CENTRALEUROPE
\_ CP1282
MACINTOSH
\_ CSMACINTOSH
\_ MAC
MAC-IS
\_ MACIS
MAC-UK
\_ MAC-CYRILLIC
\_ MACCYRILLIC
\_ MACUK
\_ MACUKRAINIAN
MSZ_7795.3
\_ CSISO86HUNGARIAN
\_ HU
\_ ISO646-HU
\_ ISO-IR-86
NATS-DANO
\_ CSNATSDANO
\_ ISO-IR-9-1
\_ NATSDANO
NATS-SEFI
\_ CSNATSSEFI
\_ ISO-IR-8-1
\_ NATSSEFI
NC_NC00-10
\_ CSISO151CUBA
\_ CUBA
\_ ISO646-CU
\_ ISO-IR-151
\_ NC_NC00-10:81
\_ NC_NC0010
NF_Z_62-010
\_ CSISO69FRENCH
\_ FR
\_ ISO646-FR
\_ ISO-IR-69
\_ NF_Z_62010
NF_Z_62-010_1973
\_ CSISO25FRENCH
\_ ISO646-FR1
\_ ISO-IR-25
\_ NF_Z_62010_1973
\_ NF_Z_62-010_(1973)
NS_4551-1
\_ CSISO60DANISHNORWEGIAN
\_ CSISO60NORWEGIAN1
\_ ISO646-NO
\_ ISO-IR-60
\_ NO
\_ NS_45511
NS_4551-2
\_ CSISO61NORWEGIAN2
\_ ISO646-NO2
\_ ISO-IR-61
\_ NO2
\_ NS_45512
PT
\_ CSISO16PORTUGESE
\_ ISO646-PT
\_ ISO-IR-16
PT2
\_ CSISO84PORTUGUESE2
\_ ISO646-PT2
\_ ISO-IR-84
RK1048
\_ STRK1048-2002
SEN_850200_B
\_ CSISO10SWEDISH
\_ FI
\_ ISO646-FI
\_ ISO646-SE
\_ ISO-IR-10
\_ SE
\_ SS636127
SEN_850200_C
\_ CSISO11SWEDISHFORNAMES
\_ ISO646-SE2
\_ ISO-IR-11
\_ SE2
Shift_JISX0213
\_ ShiftJISX0213
SJIS
\_ CSSHIFTJIS
\_ MS_KANJI
\_ SHIFT-JIS
\_ SHIFT_JIS
T.61-8BIT
\_ CSISO103T618BIT
\_ ISO-IR-103
\_ T.618BIT
\_ T.61
TCVN5712-1
\_ TCVN5712-1:1993
\_ TCVN-5712
\_ TCVN
TIS-620
\_ ISO-IR-166
\_ TIS620-0
\_ TIS620.2529-1
\_ TIS620.2533-0
\_ TIS620
UHC
\_ CP949
\_ MSCP949
\_ OSF100203B5
UNICODE
\_ CSUNICODE
UTF-16
\_ UTF16
UTF-16BE
\_ UTF16BE
UTF-16LE
\_ UTF16LE
UTF-32
\_ UTF32
UTF-32BE
\_ UTF32BE
UTF-32LE
\_ UTF32LE
UTF-7
\_ UTF7
WIN-SAMI-2
\_ WINSAMI2
\_ WS2
#!/usr/bin/env bash
# Assuming this script is in directory which also contains the glibc source in
# a 'glibc' directory, print a tree structure of all character set aliases.
# First, generates a tmp file iconv-raw-aliases.txt which contains
# a line with the format 'SOURCE,TARGET' for every character set alias defined
# in glibc.
# First handle the files:
# - glibc/iconvdata/gconv-modules and
# - glibc/iconvdata/gconv-modules-extra.conf
# Original format is 'alias SOURCE TARGET'
grep -h '^alias' glibc/iconvdata/gconv-modules glibc/iconvdata/gconv-modules-extra.conf |\
sed -E -e 's|//||g' -e 's/[[:space:]]+/,/g' -e 's/^alias,//g' \
>| iconv-raw-aliases.txt
# Then handle the file glibc/iconv/gconv_builtin.h
# Original format is 'BUILTIN_ALIAS ("SOURCE", "TARGET")'
# - Remove lines containing 'INTERNAL'
# - Remove lines where the target is 'UNICODELITTLE' and 'UNICODEBIG', as this
# is the only case of redundant aliases.
grep BUILTIN_ALIAS glibc/iconv/gconv_builtin.h |\
sed -E '/INTERNAL/d' |\
sed -E 's/BUILTIN_ALIAS \("(.*)", "(.*)"\).*/\1,\2/g' |\
sed -E -e 's|//||g' -e 's|/$||g' -e 's|/,|,|g' |\
sed -e '/UNICODELITTLE$/d' -e '/UNICODEBIG$/d' \
>> iconv-raw-aliases.txt
# Print a tree-view of aliases (one level deep), sorted alphabetically by
# TARGET, in which targets are rendered by themselves in a line, and for every
# 'SOURCE,TARGET' line, an alphabetically sorted list of lines of the form
# ' \_ SOURCE' is rendered just below.
# clear the output file 'iconv-aliases.txt'
>| iconv-aliases.txt
# Sort the file by target, and then by source, and print to tmp file
sort -t, -k2 iconv-raw-aliases.txt| awk -F, '{ print $2 }' | uniq | while read target; do
echo "$target" >> iconv-aliases.txt
sort -t, -k1 iconv-raw-aliases.txt |\
awk -F, -v target="$target" '{ if ($2 == target) print " \\_ " $1 }' \
>> iconv-aliases.txt
done
# Print iconv-aliases.txt and then delete all generated files.
cat iconv-aliases.txt
rm iconv-aliases.txt > /dev/null 2>&1
rm iconv-raw-aliases.txt > /dev/null 2>&1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment