Created
June 15, 2016 08:20
-
-
Save leidegre/8c117f7302d97ef2d05f5782d65bc09a to your computer and use it in GitHub Desktop.
Figure out what casing various Unicode code points are mapped to by javascript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This program only mapps the Latin-1 and Latin-1 Supplement code range but can be extended to support anything | |
function mkInterval(a) { | |
return [a[0], a[1], a[1] - a[0]] | |
} | |
function adj(a, b) { | |
return (a[0] + 1 == b[0]) | |
&& (a[1] + 1 == b[1]) | |
&& (a[2] == b[2]) | |
} | |
let map = [] | |
for (let i = 0x20; i <= 0xFF; i++) { // basic latin and latin-1 supplement | |
const a = String.fromCharCode(i) | |
const b = a.toLowerCase().charCodeAt(0) | |
if (i !== b) { | |
map.push(mkInterval([i, b])) | |
} | |
} | |
// Find all continuous intervals | |
const cont = [] | |
let a = map.shift() | |
cont.push(a) | |
for (; ;) { | |
const b = map.shift() | |
if (!b) { | |
break | |
} | |
if (adj(a, b)) { | |
const c = cont.pop() | |
c[1] = b[0] | |
cont.push(c) | |
} else { | |
cont.push(b) | |
} | |
a = b | |
} | |
//DEBUG | |
// cont.forEach((c) => { | |
// const xs = [] | |
// for (let i = c[0]; i < c[1]; i++) { | |
// xs.push(`${String.fromCharCode(i)} -> ${String.fromCharCode(i + c[2])}`) | |
// } | |
// console.log(xs.join(', ')) | |
// }) | |
console.log(cont) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment