Created
May 11, 2023 11:36
-
-
Save narze/2bcea78283689bb4b63eaffa57b95cd4 to your computer and use it in GitHub Desktop.
Split Thai graphemes but fix for vertical display
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function splitthai(str) { | |
// https://github.com/nota/split-graphemes/blob/master/src/thai.js | |
const letter = '[\\u0E00-\\u0E7F]'; | |
const trailingLetter = '[\\u0E31\\u0E33-\\u0E3A\\u0E47-\\u0E4E]'; | |
const thai = `${letter}${trailingLetter}*`; | |
const splitter = new RegExp(`(${thai})`, 'gu'); | |
return str.replace(/ำ/g, 'ํา').replace(/แ/g, 'เเ').match(splitter) || []; | |
} | |
console.log(splitthai('อิอิกำ')); // [ 'อิ', 'อิ', 'กํ', 'า' ] | |
console.log(splitthai('ไม้ทัณฑฆาต')); // ['ไ', 'ม้', 'ทั', 'ณ', 'ฑ', 'ฆ', 'า', 'ต']) | |
console.log(splitthai('ข้าวมันไก่')); // ['ข้', 'า', 'ว', 'มั', 'น', 'ไ', 'ก่']) | |
console.log(splitthai('ลำแสง')); // [ 'ลํ', 'า', 'เ', 'เ', 'ส', 'ง' ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment