Last active
February 24, 2023 06:55
-
-
Save allfake/44efe955480bc3f2dc08cc7b15f841cd to your computer and use it in GitHub Desktop.
thai sorting -> port form java to typescipt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://thai-notes.com/notes/sortingthai.html => java to typescript | |
const SARA_E = 0x0e40; | |
const SARA_AI_MAIMALAI = 0x0e44; | |
const MAITAIKHU = 0x0e47; | |
const THANTHAKHAT = 0x0e4c; // a.k.a. "garan" | |
export function isLeadingVowel(c: string) { | |
// Returns true if character is in the range from SARA E to SARA AI MAIMALAI, | |
// i.e. if the character is a leading vowel | |
return c.charCodeAt(0) >= SARA_E && c.charCodeAt(0) <= SARA_AI_MAIMALAI; | |
} | |
export function isToneMark(c: string) { | |
// Returns true if character is in the range from MAITHAIKHU to THANTHAKHAT | |
// which includes the four tone marks. I.e. all "above" symbols | |
return c.charCodeAt(0) >= MAITAIKHU && c.charCodeAt(0) <= THANTHAKHAT; | |
} | |
function setCharAt(str: string, index: number, chr: string) { | |
if (index > str.length - 1) return str; | |
return str.substring(0, index) + chr + str.substring(index + 1); | |
} | |
export function getThaiComparisonString(s: string) { | |
// Convert String to a character array | |
const chars = s; | |
// Swap all leading vowels with next character | |
for (let i = 0; i < chars.length; i++) { | |
if (isLeadingVowel(chars[i])) { | |
const c = chars[i]; | |
setCharAt(chars, i, chars[i + 1]); | |
setCharAt(chars, i + 1, c); | |
i++; | |
} | |
} | |
// The String for comparison is built in to parts, here referred to | |
// as "head" and "tail". "tail" always begins with "00". | |
let head = ""; | |
let tail = "00"; | |
// Add each character to the "head" unless it's a tone mark, | |
// MAITHAIKHU, or THANTHAKHAT, in which case add a 2 digit | |
// String to "tail" representing its original position from the | |
// END of the original String, then append the mark itself to "tail". | |
for (let i = 0; i < chars.length; i++) { | |
if (isToneMark(chars[i])) { | |
const pos = chars.length - i; | |
tail += pos >= 10 ? "" + pos : "0" + pos; | |
tail += chars[i]; | |
} else { | |
head += chars[i]; | |
} | |
} | |
// Return the Comparison string | |
return head + tail; | |
} | |
export function HowtoUse(a: string, b: string) { | |
const csA = getThaiComparisonString(a); | |
const csB = getThaiComparisonString(b); | |
return csA.localeCompare(csB, "th"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment