Skip to content

Instantly share code, notes, and snippets.

@jonchurch
Created January 9, 2026 21:44
Show Gist options
  • Select an option

  • Save jonchurch/7d5d6efbc787007aca514b53320bac04 to your computer and use it in GitHub Desktop.

Select an option

Save jonchurch/7d5d6efbc787007aca514b53320bac04 to your computer and use it in GitHub Desktop.
DBCS Edge Case Testing: iconv-lite vs TextDecoder vs WHATWG Spec
/**
* DBCS Edge Case Testing: iconv-lite vs TextDecoder vs WHATWG Spec
*
* References:
* - https://encoding.spec.whatwg.org/#shift_jis-decoder
* - https://encoding.spec.whatwg.org/#big5-decoder
* - https://encoding.spec.whatwg.org/#gbk-decoder
* - https://encoding.spec.whatwg.org/#euc-kr-decoder
*/
// const iconv = require("iconv-lite")
const iconv = require("./")
const tests = [
{
encoding: "shift_jis",
bytes: [0x80],
expected: "U+0080",
reason: "Spec: 'If byte is an ASCII byte or 0x80, return a code point whose value is byte'"
},
{
encoding: "shift_jis",
bytes: [0x82, 0x41],
expected: "U+FFFD U+0041",
reason: "Spec: 'If byte is an ASCII byte, restore byte to ioQueue. Return error.'"
},
{
encoding: "big5",
bytes: [0x80],
expected: "U+FFFD",
reason: "Lead byte range is 0x81-0xFE; 0x80 is outside range"
},
{
encoding: "gbk",
bytes: [0xFF],
expected: "U+FFFD",
reason: "Lead byte range is 0x81-0xFE; 0xFF is outside range"
},
{
encoding: "euc-kr",
bytes: [0xB0, 0x41],
expected: "U+CE9A",
reason: "Trail byte range is 0x41-0xFE; 0x41 is valid and maps to U+CE9A"
}
]
function toCodePoints (str) {
return [...str]
.map(c => "U+" + c.charCodeAt(0).toString(16).toUpperCase().padStart(4, "0"))
.join(" ")
}
function formatBytes (bytes) {
return "[" + bytes.map(b => "0x" + b.toString(16).toUpperCase()).join(", ") + "]"
}
for (const t of tests) {
const buf = Buffer.from(t.bytes)
const il = toCodePoints(iconv.decode(buf, t.encoding))
const td = toCodePoints(new TextDecoder(t.encoding).decode(buf))
console.log(t.encoding.toUpperCase() + " " + formatBytes(t.bytes))
console.log(" Expected: " + t.expected + " (" + t.reason + ")")
console.log(" iconv-lite: " + il + (il === t.expected ? " ✓" : " ✗"))
console.log(" TextDecoder: " + td + (td === t.expected ? " ✓" : " ✗"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment