Skip to content

Instantly share code, notes, and snippets.

@vicapow
Created April 11, 2025 22:06
Show Gist options
  • Save vicapow/bd54806582b287e9e9e9028b651fd6a5 to your computer and use it in GitHub Desktop.
Save vicapow/bd54806582b287e9e9e9028b651fd6a5 to your computer and use it in GitHub Desktop.
Generate unicode forms
(async () => {
const url = 'https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt';
const res = await fetch(url);
const text = await res.text();
const lines = text.split('\n');
const digitEntries = [];
for (const line of lines) {
if (!line.trim()) continue;
const fields = line.split(';');
const code = parseInt(fields[0], 16);
const category = fields[2];
const decimal = fields[6];
if (category === 'Nd' && decimal !== '') {
digitEntries.push({ code, decimal: Number(decimal), name: fields[1] });
}
}
// Sort and group into 10-digit ranges with decimals 0–9
const ranges = [];
let group = [];
for (let i = 0; i < digitEntries.length; i++) {
const curr = digitEntries[i];
group.push(curr);
if (group.length === 10) {
const validDecimals = group.map(e => e.decimal).sort((a, b) => a - b).join(',');
if (validDecimals === '0,1,2,3,4,5,6,7,8,9') {
ranges.push([...group]);
}
group = [];
} else if (i < digitEntries.length - 1) {
const next = digitEntries[i + 1];
if (next.code !== curr.code + 1) group = [];
}
}
// Generate the getForm() function
console.log('function getForm(codePoint) {');
for (let i = 0; i < ranges.length; i++) {
const group = ranges[i];
const start = group[0].code;
const end = group[9].code;
const label = group[0].name.toLowerCase().replace(/[^a-z]/g, '_').replace(/_+/g, '_').replace(/^_|_$/g, '');
console.log(` if (codePoint >= 0x${start.toString(16).toUpperCase()} && codePoint <= 0x${end.toString(16).toUpperCase()}) return '${label}';`);
}
console.log(" return 'other';");
console.log('}');
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment