-
-
Save thaont-0210/96bfa4629ae9259ab1f1998b03e24bd0 to your computer and use it in GitHub Desktop.
Converting standard Vietnamese Characters to non-accent ones (Chuyển đổi ký tự tiếng Việt sang không dấu). Example: hải -> hai
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This function converts the string to lowercase, then perform the conversion | |
function toLowerCaseNonAccentVietnamese(str) { | |
str = str.toLowerCase(); | |
// We can also use this instead of from line 11 to line 17 | |
// str = str.replace(/\u00E0|\u00E1|\u1EA1|\u1EA3|\u00E3|\u00E2|\u1EA7|\u1EA5|\u1EAD|\u1EA9|\u1EAB|\u0103|\u1EB1|\u1EAF|\u1EB7|\u1EB3|\u1EB5/g, "a"); | |
// str = str.replace(/\u00E8|\u00E9|\u1EB9|\u1EBB|\u1EBD|\u00EA|\u1EC1|\u1EBF|\u1EC7|\u1EC3|\u1EC5/g, "e"); | |
// str = str.replace(/\u00EC|\u00ED|\u1ECB|\u1EC9|\u0129/g, "i"); | |
// str = str.replace(/\u00F2|\u00F3|\u1ECD|\u1ECF|\u00F5|\u00F4|\u1ED3|\u1ED1|\u1ED9|\u1ED5|\u1ED7|\u01A1|\u1EDD|\u1EDB|\u1EE3|\u1EDF|\u1EE1/g, "o"); | |
// str = str.replace(/\u00F9|\u00FA|\u1EE5|\u1EE7|\u0169|\u01B0|\u1EEB|\u1EE9|\u1EF1|\u1EED|\u1EEF/g, "u"); | |
// str = str.replace(/\u1EF3|\u00FD|\u1EF5|\u1EF7|\u1EF9/g, "y"); | |
// str = str.replace(/\u0111/g, "d"); | |
str = str.replace(/à|á|ạ|ả|ã|â|ầ|ấ|ậ|ẩ|ẫ|ă|ằ|ắ|ặ|ẳ|ẵ/g, "a"); | |
str = str.replace(/è|é|ẹ|ẻ|ẽ|ê|ề|ế|ệ|ể|ễ/g, "e"); | |
str = str.replace(/ì|í|ị|ỉ|ĩ/g, "i"); | |
str = str.replace(/ò|ó|ọ|ỏ|õ|ô|ồ|ố|ộ|ổ|ỗ|ơ|ờ|ớ|ợ|ở|ỡ/g, "o"); | |
str = str.replace(/ù|ú|ụ|ủ|ũ|ư|ừ|ứ|ự|ử|ữ/g, "u"); | |
str = str.replace(/ỳ|ý|ỵ|ỷ|ỹ/g, "y"); | |
str = str.replace(/đ/g, "d"); | |
// Some system encode vietnamese combining accent as individual utf-8 characters | |
str = str.replace(/\u0300|\u0301|\u0303|\u0309|\u0323/g, ""); // Huyền sắc hỏi ngã nặng | |
str = str.replace(/\u02C6|\u0306|\u031B/g, ""); // Â, Ê, Ă, Ơ, Ư | |
return str; | |
} | |
// This function keeps the casing unchanged for str, then perform the conversion | |
function toNonAccentVietnamese(str) { | |
str = str.replace(/A|Á|À|Ã|Ạ|Â|Ấ|Ầ|Ẫ|Ậ|Ă|Ắ|Ằ|Ẵ|Ặ/g, "A"); | |
str = str.replace(/à|á|ạ|ả|ã|â|ầ|ấ|ậ|ẩ|ẫ|ă|ằ|ắ|ặ|ẳ|ẵ/g, "a"); | |
str = str.replace(/E|É|È|Ẽ|Ẹ|Ê|Ế|Ề|Ễ|Ệ/, "E"); | |
str = str.replace(/è|é|ẹ|ẻ|ẽ|ê|ề|ế|ệ|ể|ễ/g, "e"); | |
str = str.replace(/I|Í|Ì|Ĩ|Ị/g, "I"); | |
str = str.replace(/ì|í|ị|ỉ|ĩ/g, "i"); | |
str = str.replace(/O|Ó|Ò|Õ|Ọ|Ô|Ố|Ồ|Ỗ|Ộ|Ơ|Ớ|Ờ|Ỡ|Ợ/g, "O"); | |
str = str.replace(/ò|ó|ọ|ỏ|õ|ô|ồ|ố|ộ|ổ|ỗ|ơ|ờ|ớ|ợ|ở|ỡ/g, "o"); | |
str = str.replace(/U|Ú|Ù|Ũ|Ụ|Ư|Ứ|Ừ|Ữ|Ự/g, "U"); | |
str = str.replace(/ù|ú|ụ|ủ|ũ|ư|ừ|ứ|ự|ử|ữ/g, "u"); | |
str = str.replace(/Y|Ý|Ỳ|Ỹ|Ỵ/g, "Y"); | |
str = str.replace(/ỳ|ý|ỵ|ỷ|ỹ/g, "y"); | |
str = str.replace(/Đ/g, "D"); | |
str = str.replace(/đ/g, "d"); | |
// Some system encode vietnamese combining accent as individual utf-8 characters | |
str = str.replace(/\u0300|\u0301|\u0303|\u0309|\u0323/g, ""); // Huyền sắc hỏi ngã nặng | |
str = str.replace(/\u02C6|\u0306|\u031B/g, ""); // Â, Ê, Ă, Ơ, Ư | |
return str; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment