Created
January 9, 2026 21:44
-
-
Save jonchurch/7d5d6efbc787007aca514b53320bac04 to your computer and use it in GitHub Desktop.
DBCS Edge Case Testing: iconv-lite vs TextDecoder vs WHATWG Spec
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * DBCS Edge Case Testing: iconv-lite vs TextDecoder vs WHATWG Spec | |
| * | |
| * References: | |
| * - https://encoding.spec.whatwg.org/#shift_jis-decoder | |
| * - https://encoding.spec.whatwg.org/#big5-decoder | |
| * - https://encoding.spec.whatwg.org/#gbk-decoder | |
| * - https://encoding.spec.whatwg.org/#euc-kr-decoder | |
| */ | |
| // const iconv = require("iconv-lite") | |
| const iconv = require("./") | |
| const tests = [ | |
| { | |
| encoding: "shift_jis", | |
| bytes: [0x80], | |
| expected: "U+0080", | |
| reason: "Spec: 'If byte is an ASCII byte or 0x80, return a code point whose value is byte'" | |
| }, | |
| { | |
| encoding: "shift_jis", | |
| bytes: [0x82, 0x41], | |
| expected: "U+FFFD U+0041", | |
| reason: "Spec: 'If byte is an ASCII byte, restore byte to ioQueue. Return error.'" | |
| }, | |
| { | |
| encoding: "big5", | |
| bytes: [0x80], | |
| expected: "U+FFFD", | |
| reason: "Lead byte range is 0x81-0xFE; 0x80 is outside range" | |
| }, | |
| { | |
| encoding: "gbk", | |
| bytes: [0xFF], | |
| expected: "U+FFFD", | |
| reason: "Lead byte range is 0x81-0xFE; 0xFF is outside range" | |
| }, | |
| { | |
| encoding: "euc-kr", | |
| bytes: [0xB0, 0x41], | |
| expected: "U+CE9A", | |
| reason: "Trail byte range is 0x41-0xFE; 0x41 is valid and maps to U+CE9A" | |
| } | |
| ] | |
| function toCodePoints (str) { | |
| return [...str] | |
| .map(c => "U+" + c.charCodeAt(0).toString(16).toUpperCase().padStart(4, "0")) | |
| .join(" ") | |
| } | |
| function formatBytes (bytes) { | |
| return "[" + bytes.map(b => "0x" + b.toString(16).toUpperCase()).join(", ") + "]" | |
| } | |
| for (const t of tests) { | |
| const buf = Buffer.from(t.bytes) | |
| const il = toCodePoints(iconv.decode(buf, t.encoding)) | |
| const td = toCodePoints(new TextDecoder(t.encoding).decode(buf)) | |
| console.log(t.encoding.toUpperCase() + " " + formatBytes(t.bytes)) | |
| console.log(" Expected: " + t.expected + " (" + t.reason + ")") | |
| console.log(" iconv-lite: " + il + (il === t.expected ? " ✓" : " ✗")) | |
| console.log(" TextDecoder: " + td + (td === t.expected ? " ✓" : " ✗")) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment