Skip to content

Instantly share code, notes, and snippets.

@vicapow
Created April 11, 2025 21:49
Show Gist options
  • Save vicapow/829bdb5976341d2a866dddeaf4a336b0 to your computer and use it in GitHub Desktop.
Save vicapow/829bdb5976341d2a866dddeaf4a336b0 to your computer and use it in GitHub Desktop.
generate numeric unicode character table
(async () => {
const [unicodeDataTxt, scriptsTxt] = await Promise.all([
fetch('https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt').then(res => res.text()),
fetch('https://www.unicode.org/Public/UCD/latest/ucd/Scripts.txt').then(res => res.text()),
]);
const scriptMap = [];
// Parse Scripts.txt to map codepoint ranges → script
for (const line of scriptsTxt.split('\n')) {
const cleaned = line.replace(/#.*/, '').trim();
if (!cleaned) continue;
const [rangePart, scriptName] = cleaned.split(';').map(s => s.trim());
let start, end;
if (rangePart.includes('..')) {
[start, end] = rangePart.split('..').map(cp => parseInt(cp, 16));
} else {
start = end = parseInt(rangePart, 16);
}
scriptMap.push({ start, end, script: scriptName });
}
// Function to find script for a codepoint
const getScript = (codePoint) => {
for (const { start, end, script } of scriptMap) {
if (codePoint >= start && codePoint <= end) {
return script;
}
}
return 'Unknown';
};
const unicodeData = {};
for (const line of unicodeDataTxt.split('\n')) {
if (!line.trim()) continue;
const fields = line.split(';');
const codeHex = fields[0];
const category = fields[2];
const decimal = fields[6];
const digit = fields[7];
const numeric = fields[8];
const codePoint = parseInt(codeHex, 16);
const char = String.fromCodePoint(codePoint);
const script = getScript(codePoint);
const entry = {
script: `'${script}'`,
decimalDigit: category === 'Nd',
};
if (decimal !== '') entry.decimalValue = Number(decimal);
if (numeric !== '') {
entry.numeric = numeric.includes('/')
? (() => {
const [n, d] = numeric.split('/').map(Number);
return n / d;
})()
: Number(numeric);
}
// Only include useful entries
if (
entry.decimalDigit ||
entry.decimalValue !== undefined ||
entry.numeric !== undefined
) {
unicodeData[char] = entry;
}
}
// Output copy-paste-ready JS
console.log('let unicodeData = {');
for (const [char, data] of Object.entries(unicodeData)) {
const key = `'${char}'`;
const props = Object.entries(data)
.map(([k, v]) => `${k}: ${v}`)
.join(', ');
console.log(` ${key}: { ${props} },`);
}
console.log('};');
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment