Last active
July 24, 2024 19:01
-
-
Save ArtemAvramenko/691ed8610d4ddabdca7de96c8b173c56 to your computer and use it in GitHub Desktop.
Converts strings in national alphabets to Basic Latin identifiers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const toBasicLatin = (function() { | |
// see https://gist.github.com/ArtemAvramenko/ec3b5358221f8b6e9f3e9efe1d0a3066 | |
const data = | |
'AαаAEæBβбCHчDđðδдDJђDZѕDZHџEεηеэFƒфFIfiFLflGγгґGJѓIıιиіIAяIEєIOёIUюJјKκкKHχхKJќLłλл'+ | |
'LJљMμмNνнNJњOøοωоOEœPπпPHφPSψRρрSσςсSHшSHCHщSSßTτтTHþθTSцTSHћUµυуVвXξYыYIїZζзZHж'; | |
let map = {}; | |
for(const x of data.matchAll(/([A-Z]+)([^A-Z]+)/g)) { | |
const latValue = x[1].toLowerCase(); | |
for (const ch of x[2]) { | |
map[ch] = latValue; | |
} | |
} | |
const getLat = ch => /[a-z0-9]/.test(ch) ? ch : map[ch]; | |
return (s, mode, splitSym = '') => { // 'lower' | 'upper' | 'camel' | 'pascal' | |
if (!s) { | |
return s; | |
} | |
s = s.toLowerCase().trim(); | |
let res = ''; | |
let isNextWord = mode == 'pascal'; | |
const titleWords = isNextWord || mode == 'camel'; | |
const uppercase = mode == 'upper'; | |
for (const ch of s) { | |
if (/\s/.test(ch)) { | |
isNextWord = true; | |
} else { | |
let lat = getLat(ch) || getLat(ch.normalize('NFKD')[0]); | |
if (lat) { | |
if (isNextWord) { | |
if (titleWords) { | |
lat = lat[0].toUpperCase() + lat.substring(1); | |
} | |
if (res) { | |
res += splitSym; | |
} | |
isNextWord = false; | |
} | |
res += uppercase ? lat.toUpperCase() : lat; | |
} | |
} | |
} | |
return res; | |
} | |
})(); | |
const toBasicLatin2 = (function() { | |
// see https://gist.github.com/ArtemAvramenko/ec3b5358221f8b6e9f3e9efe1d0a3066 | |
const data = | |
'AαаAEæBβбCHчDđðδдDJђDZѕDZHџEεηеэFƒфFIfiFLflGγгґGJѓIıιиіIAяIEєIOёIUюJјKκкKHχхKJќLłλл'+ | |
'LJљMμмNνнNJњOøοωоOEœPπпPHφPSψRρрSσςсSHшSHCHщSSßTτтTHþθTSцTSHћUµυуVвXξYыYIїZζзZHж'; | |
let map = {}; | |
for(const x of data.matchAll(/([A-Z]+)([^A-Z]+)/g)) { | |
const latValue = x[1].toLowerCase(); | |
for (const ch of x[2]) { | |
map[ch] = latValue; | |
} | |
} | |
const getLat = ch => /[a-z0-9]/.test(ch) ? ch : map[ch]; | |
return s => s ? | |
Array.prototype | |
.map.call( | |
s.toLowerCase(), | |
ch => getLat(ch) || getLat(ch.normalize('NFKD')[0])) | |
.join('') : | |
s; | |
})(); | |
// // a fallback in case the string does not contain a single known letter: | |
// // example: !"#$ -> xyz10 | |
// const fallbackName = 'role0' + Array.prototype.map.call(s, c => c.charCodeAt(0).toString(36)).join(''); | |
const expect = actual => ({ | |
toBe: expected => { | |
if (actual !== expected) { | |
console.error(actual + ' should be ' + expected) | |
} | |
} | |
}); | |
expect(toBasicLatin('Großer Chef', 'camel')).toBe('grosserChef'); | |
expect(toBasicLatin('Ґазда', 'camel')).toBe('gazda'); | |
expect(toBasicLatin('Πρόεδρος', 'camel')).toBe('proedros'); | |
expect(toBasicLatin('Başkanı', 'camel')).toBe('baskani'); | |
expect(toBasicLatin('Hans Høyhet', 'camel')).toBe('hansHoyhet'); | |
expect(toBasicLatin('роль 123', 'camel')).toBe('rol123'); | |
expect(toBasicLatin('Оператор щита', 'camel')).toBe('operatorShchita'); | |
expect(toBasicLatin(' Executive director ', 'lower', ' ')).toBe('executive director'); | |
expect(toBasicLatin('Executive director', 'lower')).toBe('executivedirector'); | |
expect(toBasicLatin('Executive director', 'camel')).toBe('executiveDirector'); | |
expect(toBasicLatin('Executive director', 'pascal')).toBe('ExecutiveDirector'); | |
expect(toBasicLatin('Executive director', 'lower', '-')).toBe('executive-director'); | |
expect(toBasicLatin('Executive director', 'camel', '-')).toBe('executive-Director'); | |
expect(toBasicLatin('Executive director', 'pascal', '-')).toBe('Executive-Director'); | |
expect(toBasicLatin('Großer Chef', 'lower', '_')).toBe('grosser_chef'); | |
expect(toBasicLatin('Großer Chef', 'upper', '_')).toBe('GROSSER_CHEF'); | |
expect(toBasicLatin('Großer Chef', 'upper')).toBe('GROSSERCHEF'); | |
expect(toBasicLatin('flying phœnix', 'lower')).toBe('flyingphoenix'); | |
console.log('tests completed'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment