Skip to content

Instantly share code, notes, and snippets.

@ArtemAvramenko
Last active July 24, 2024 19:01
Show Gist options
  • Save ArtemAvramenko/691ed8610d4ddabdca7de96c8b173c56 to your computer and use it in GitHub Desktop.
Save ArtemAvramenko/691ed8610d4ddabdca7de96c8b173c56 to your computer and use it in GitHub Desktop.
Converts strings in national alphabets to Basic Latin identifiers
const toBasicLatin = (function() {
// see https://gist.github.com/ArtemAvramenko/ec3b5358221f8b6e9f3e9efe1d0a3066
const data =
'AαаAEæBβбCHчDđðδдDJђDZѕDZHџEεηеэFƒфFIfiFLflGγгґGJѓIıιиіIAяIEєIOёIUюJјKκкKHχхKJќLłλл'+
'LJљMμмNνнNJњOøοωоOEœPπпPHφPSψRρрSσςсSHшSHCHщSSßTτтTHþθTSцTSHћUµυуVвXξYыYIїZζзZHж';
let map = {};
for(const x of data.matchAll(/([A-Z]+)([^A-Z]+)/g)) {
const latValue = x[1].toLowerCase();
for (const ch of x[2]) {
map[ch] = latValue;
}
}
const getLat = ch => /[a-z0-9]/.test(ch) ? ch : map[ch];
return (s, mode, splitSym = '') => { // 'lower' | 'upper' | 'camel' | 'pascal'
if (!s) {
return s;
}
s = s.toLowerCase().trim();
let res = '';
let isNextWord = mode == 'pascal';
const titleWords = isNextWord || mode == 'camel';
const uppercase = mode == 'upper';
for (const ch of s) {
if (/\s/.test(ch)) {
isNextWord = true;
} else {
let lat = getLat(ch) || getLat(ch.normalize('NFKD')[0]);
if (lat) {
if (isNextWord) {
if (titleWords) {
lat = lat[0].toUpperCase() + lat.substring(1);
}
if (res) {
res += splitSym;
}
isNextWord = false;
}
res += uppercase ? lat.toUpperCase() : lat;
}
}
}
return res;
}
})();
const toBasicLatin2 = (function() {
// see https://gist.github.com/ArtemAvramenko/ec3b5358221f8b6e9f3e9efe1d0a3066
const data =
'AαаAEæBβбCHчDđðδдDJђDZѕDZHџEεηеэFƒфFIfiFLflGγгґGJѓIıιиіIAяIEєIOёIUюJјKκкKHχхKJќLłλл'+
'LJљMμмNνнNJњOøοωоOEœPπпPHφPSψRρрSσςсSHшSHCHщSSßTτтTHþθTSцTSHћUµυуVвXξYыYIїZζзZHж';
let map = {};
for(const x of data.matchAll(/([A-Z]+)([^A-Z]+)/g)) {
const latValue = x[1].toLowerCase();
for (const ch of x[2]) {
map[ch] = latValue;
}
}
const getLat = ch => /[a-z0-9]/.test(ch) ? ch : map[ch];
return s => s ?
Array.prototype
.map.call(
s.toLowerCase(),
ch => getLat(ch) || getLat(ch.normalize('NFKD')[0]))
.join('') :
s;
})();
// // a fallback in case the string does not contain a single known letter:
// // example: !"#$ -> xyz10
// const fallbackName = 'role0' + Array.prototype.map.call(s, c => c.charCodeAt(0).toString(36)).join('');
const expect = actual => ({
toBe: expected => {
if (actual !== expected) {
console.error(actual + ' should be ' + expected)
}
}
});
expect(toBasicLatin('Großer Chef', 'camel')).toBe('grosserChef');
expect(toBasicLatin('Ґазда', 'camel')).toBe('gazda');
expect(toBasicLatin('Πρόεδρος', 'camel')).toBe('proedros');
expect(toBasicLatin('Başkanı', 'camel')).toBe('baskani');
expect(toBasicLatin('Hans Høyhet', 'camel')).toBe('hansHoyhet');
expect(toBasicLatin('роль 123', 'camel')).toBe('rol123');
expect(toBasicLatin('Оператор щита', 'camel')).toBe('operatorShchita');
expect(toBasicLatin(' Executive director ', 'lower', ' ')).toBe('executive director');
expect(toBasicLatin('Executive director', 'lower')).toBe('executivedirector');
expect(toBasicLatin('Executive director', 'camel')).toBe('executiveDirector');
expect(toBasicLatin('Executive director', 'pascal')).toBe('ExecutiveDirector');
expect(toBasicLatin('Executive director', 'lower', '-')).toBe('executive-director');
expect(toBasicLatin('Executive director', 'camel', '-')).toBe('executive-Director');
expect(toBasicLatin('Executive director', 'pascal', '-')).toBe('Executive-Director');
expect(toBasicLatin('Großer Chef', 'lower', '_')).toBe('grosser_chef');
expect(toBasicLatin('Großer Chef', 'upper', '_')).toBe('GROSSER_CHEF');
expect(toBasicLatin('Großer Chef', 'upper')).toBe('GROSSERCHEF');
expect(toBasicLatin('flying phœnix', 'lower')).toBe('flyingphoenix');
console.log('tests completed');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment