Created
April 11, 2025 21:49
-
-
Save vicapow/829bdb5976341d2a866dddeaf4a336b0 to your computer and use it in GitHub Desktop.
generate numeric unicode character table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(async () => { | |
const [unicodeDataTxt, scriptsTxt] = await Promise.all([ | |
fetch('https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt').then(res => res.text()), | |
fetch('https://www.unicode.org/Public/UCD/latest/ucd/Scripts.txt').then(res => res.text()), | |
]); | |
const scriptMap = []; | |
// Parse Scripts.txt to map codepoint ranges → script | |
for (const line of scriptsTxt.split('\n')) { | |
const cleaned = line.replace(/#.*/, '').trim(); | |
if (!cleaned) continue; | |
const [rangePart, scriptName] = cleaned.split(';').map(s => s.trim()); | |
let start, end; | |
if (rangePart.includes('..')) { | |
[start, end] = rangePart.split('..').map(cp => parseInt(cp, 16)); | |
} else { | |
start = end = parseInt(rangePart, 16); | |
} | |
scriptMap.push({ start, end, script: scriptName }); | |
} | |
// Function to find script for a codepoint | |
const getScript = (codePoint) => { | |
for (const { start, end, script } of scriptMap) { | |
if (codePoint >= start && codePoint <= end) { | |
return script; | |
} | |
} | |
return 'Unknown'; | |
}; | |
const unicodeData = {}; | |
for (const line of unicodeDataTxt.split('\n')) { | |
if (!line.trim()) continue; | |
const fields = line.split(';'); | |
const codeHex = fields[0]; | |
const category = fields[2]; | |
const decimal = fields[6]; | |
const digit = fields[7]; | |
const numeric = fields[8]; | |
const codePoint = parseInt(codeHex, 16); | |
const char = String.fromCodePoint(codePoint); | |
const script = getScript(codePoint); | |
const entry = { | |
script: `'${script}'`, | |
decimalDigit: category === 'Nd', | |
}; | |
if (decimal !== '') entry.decimalValue = Number(decimal); | |
if (numeric !== '') { | |
entry.numeric = numeric.includes('/') | |
? (() => { | |
const [n, d] = numeric.split('/').map(Number); | |
return n / d; | |
})() | |
: Number(numeric); | |
} | |
// Only include useful entries | |
if ( | |
entry.decimalDigit || | |
entry.decimalValue !== undefined || | |
entry.numeric !== undefined | |
) { | |
unicodeData[char] = entry; | |
} | |
} | |
// Output copy-paste-ready JS | |
console.log('let unicodeData = {'); | |
for (const [char, data] of Object.entries(unicodeData)) { | |
const key = `'${char}'`; | |
const props = Object.entries(data) | |
.map(([k, v]) => `${k}: ${v}`) | |
.join(', '); | |
console.log(` ${key}: { ${props} },`); | |
} | |
console.log('};'); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment