Last active
November 1, 2017 21:02
-
-
Save ajmas/47bec150208624f9c8bce21334effb94 to your computer and use it in GitHub Desktop.
Remove accents and symbols not compatible with Latin base alphabet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This works by converting text to decomposed unicode form, such that the | |
accents are treated as separate characters. We then select the characters | |
we want, by means of a regex and then join the matched groups. | |
There are certain characters that won't work with this, such as 'ø', since | |
it is not an 'o' with a slash accent. | |
*/ | |
function asciiFriendlyText (text) { | |
return text.normalize("NFD").match(/([\u0009-\u0014\u0020-\u007E])+/g).join('') | |
} | |
// French | |
console.log(asciiFriendlyText('éléphant')); | |
console.log(asciiFriendlyText('Je suis un élève')); | |
// Vietnamese | |
console.log(asciiFriendlyText('ruộng')); | |
// Unsupported, since they would require different logic: | |
console.log(asciiFriendlyText('Æ, Ø, ß')); | |
console.log(asciiFriendlyText('Đà Nẵng, Quảng Nam, Quảng Ngãi, Bình Định, Phú Yên, Nha Trang')); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment