\xA7 |
\xE9 |
|
---|---|---|
ASCII-8BIT | (Encoding::UndefinedConversionError) "\xA7" from ASCII-8BIT to UTF-8 |
(Encoding::UndefinedConversionError) "\xE9" from ASCII-8BIT to UTF-8 |
Big5 | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on Big5 |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on Big5 |
Big5-HKSCS | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on Big5-HKSCS |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on Big5-HKSCS |
Big5-UAO | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on Big5-UAO |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on Big5-UAO |
CESU-8 | (Encoding::InvalidByteSequenceError) "\xA7" on CESU-8 |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on CESU-8 |
CP51932 | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on CP51932 |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on CP51932 |
CP850 | º |
√ö |
CP852 | ž |
√ö |
CP855 | –î |
–∂ |
CP949 | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on CP949 |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on CP949 |
CP950 | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on CP950 |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on CP950 |
CP951 | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on CP951 |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on CP951 |
EUC-JIS-2004 | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on EUC-JIS-2004 |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on EUC-JIS-2004 |
EUC-JP | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on EUC-JP |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on EUC-JP |
EUC-KR | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on EUC-KR |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on EUC-KR |
EUC-TW | (Encoding::ConverterNotFoundError) code converter not found (EUC-TW to UTF-8) |
(Encoding::ConverterNotFoundError) code converter not found (EUC-TW to UTF-8) |
Emacs-Mule | (Encoding::ConverterNotFoundError) code converter not found (Emacs-Mule to UTF-8) |
(Encoding::ConverterNotFoundError) code converter not found (Emacs-Mule to UTF-8) |
GB12345 | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on GB12345 |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on GB12345 |
GB18030 | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on GB18030 |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on GB18030 |
GB1988 | (Encoding::ConverterNotFoundError) code converter not found (GB1988 to UTF-8) |
(Encoding::ConverterNotFoundError) code converter not found (GB1988 to UTF-8) |
GB2312 | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on GB2312 |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on GB2312 |
GBK | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on GBK |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on GBK |
IBM437 | º |
Θ |
IBM720 | ÿØ |
ŸÑ |
IBM737 | π |
ώ |
IBM775 | ¦ |
ķ |
IBM852 | ž |
√ö |
IBM855 | –î |
–∂ |
IBM857 | ğ |
√ö |
IBM860 | º |
Θ |
IBM861 | √ö |
Θ |
IBM862 | º |
Θ |
IBM863 | ¯ |
Θ |
IBM864 | (Encoding::ConverterNotFoundError) code converter not found (IBM864 to UTF-8) |
(Encoding::ConverterNotFoundError) code converter not found (IBM864 to UTF-8) |
IBM865 | º |
Θ |
IBM866 | –∑ |
—â |
IBM869 | Δ |
ο |
ISO-8859-1 | § |
é |
ISO-8859-10 | § |
é |
ISO-8859-11 | ง |
‡πâ |
ISO-8859-13 | § |
é |
ISO-8859-14 | § |
é |
ISO-8859-15 | § |
é |
ISO-8859-16 | § |
é |
ISO-8859-2 | § |
é |
ISO-8859-3 | § |
é |
ISO-8859-4 | § |
é |
ISO-8859-5 | –á |
—â |
ISO-8859-6 | (Encoding::UndefinedConversionError) "\xA7" from ISO-8859-6 to UTF-8 |
Ÿâ |
ISO-8859-7 | § |
ι |
ISO-8859-8 | § |
◊ô |
ISO-8859-9 | § |
é |
KOI8-R | ‚ïñ |
–ò |
KOI8-U | —ó |
–ò |
MacJapanese | (Encoding::ConverterNotFoundError) code converter not found (MacJapanese to UTF-8) |
(Encoding::ConverterNotFoundError) code converter not found (MacJapanese to UTF-8) |
SJIS-DoCoMo | ÔΩß |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on SJIS-DOCOMO |
SJIS-KDDI | ÔΩß |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on SJIS-KDDI |
SJIS-SoftBank | ÔΩß |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on SJIS-SoftBank |
Shift_JIS | ÔΩß |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on Shift_JIS |
TIS-620 | ง |
‡πâ |
US-ASCII | (Encoding::InvalidByteSequenceError) "\xA7" on US-ASCII |
(Encoding::InvalidByteSequenceError) "\xE9" on US-ASCII |
UTF-16BE | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on UTF-16BE |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on UTF-16BE |
UTF-16LE | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on UTF-16LE |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on UTF-16LE |
UTF-32BE | (Encoding::InvalidByteSequenceError) "\xA7" on UTF-32BE |
(Encoding::InvalidByteSequenceError) "\xE9" on UTF-32BE |
UTF-32LE | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on UTF-32LE |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on UTF-32LE |
UTF-8 | ß |
È |
UTF8-DoCoMo | (Encoding::InvalidByteSequenceError) "\xA7" on UTF8-DoCoMo |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on UTF8-DoCoMo |
UTF8-KDDI | (Encoding::InvalidByteSequenceError) "\xA7" on UTF8-KDDI |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on UTF8-KDDI |
UTF8-MAC | (Encoding::InvalidByteSequenceError) "\xA7" on UTF8-MAC |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on UTF8-MAC |
UTF8-SoftBank | (Encoding::InvalidByteSequenceError) "\xA7" on UTF8-SoftBank |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on UTF8-SoftBank |
Windows-1250 | § |
é |
Windows-1251 | § |
–π |
Windows-1252 | § |
é |
Windows-1253 | § |
ι |
Windows-1254 | § |
é |
Windows-1255 | § |
◊ô |
Windows-1256 | § |
é |
Windows-1257 | § |
é |
Windows-1258 | (Encoding::ConverterNotFoundError) code converter not found (Windows-1258 to UTF-8) |
(Encoding::ConverterNotFoundError) code converter not found (Windows-1258 to UTF-8) |
Windows-31J | ÔΩß |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on Windows-31J |
Windows-874 | ง |
‡πâ |
eucJP-ms | (Encoding::InvalidByteSequenceError) incomplete "\xA7" on eucJP-ms |
(Encoding::InvalidByteSequenceError) incomplete "\xE9" on eucJP-ms |
macCentEuro | (Encoding::ConverterNotFoundError) code converter not found (macCentEuro to UTF-8) |
(Encoding::ConverterNotFoundError) code converter not found (macCentEuro to UTF-8) |
macCroatian | ß |
È |
macCyrillic | –Ü |
–π |
macGreek | ß |
ι |
macIceland | ß |
È |
macRoman | ß |
È |
macRomania | ß |
È |
macThai | (Encoding::ConverterNotFoundError) code converter not found (macThai to UTF-8) |
(Encoding::ConverterNotFoundError) code converter not found (macThai to UTF-8) |
macTurkish | ß |
È |
macUkraine | –Ü |
–π |
stateless-ISO-2022-JP | (Encoding::InvalidByteSequenceError) "\xA7" on stateless-ISO-2022-JP |
(Encoding::InvalidByteSequenceError) "\xE9" on stateless-ISO-2022-JP |
stateless-ISO-2022-JP-KDDI | (Encoding::InvalidByteSequenceError) "\xA7" on stateless-ISO-2022-JP-KDDI |
(Encoding::InvalidByteSequenceError) "\xE9" on stateless-ISO-2022-JP-KDDI |
Created
February 23, 2023 19:51
-
-
Save yaauie/bccc4a5e4c69d1754562459068bd44bc to your computer and use it in GitHub Desktop.
A ruby-based encoding guesser, outputs a github-flavored-markdown table of all the possible encoding interpretations for one or more byte sequences
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# provide our input strings, using ruby's backslash + lower-x + upper-hex + upper-hex | |
# to represent each non-printable or non-lower-ascii byte: | |
inputs = [ | |
"\xA7", | |
"\xE9", | |
].map do |input| | |
# we "force" the encoding to be UTF-8, | |
# effectively marking it _as_ UTF-8 without | |
# changing the bytes it contains | |
input.force_encoding(Encoding::UTF_8).freeze | |
end.freeze | |
out = $stdout | |
# we then build a github-flavored-markdown table using _all_ of the non-dummy encodings | |
# available to ruby, with the results of the transcode operation as the column value | |
# (whether it be an exception or a valid result) | |
out.write "| | `#{inputs.map(&:dump).map{|d| d[1...-1]}.join('` | `')}` |\n" | |
out.write "| --- | #{inputs.map {|_| '---' }.join(' | ')} |\n" | |
Encoding.list.reject(&:dummy?).sort_by(&:name).each do |encoding| | |
out.write "| #{encoding.name} | " | |
inputs.each do |input| | |
out.write "`#{input.encode(Encoding::UTF_8, encoding) rescue %Q[(#{$!.class}) #{$!.message}]}` |" | |
end | |
out.write "\n" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment