Last active
February 17, 2020 13:59
-
-
Save TheoryOfNekomata/a366cb4b31932185eee1b8d8429a9fb4 to your computer and use it in GitHub Desktop.
Tagalog Baybayin converter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const scripts = { | |
| tagalog: { | |
| start: 0x1700, | |
| consonantMappingOffsets: { | |
| b: 0x0a, | |
| k: 0x03, | |
| d: 0x07, | |
| g: 0x04, | |
| h: 0x11, | |
| l: 0x0e, | |
| m: 0x0b, | |
| n: 0x08, | |
| ng: 0x05, | |
| p: 0x09, | |
| r: 0x07, | |
| s: 0x10, | |
| t: 0x06, | |
| w: 0x0f, | |
| y: 0x0c | |
| }, | |
| }, | |
| hanunoo: { | |
| start: 0x1720, | |
| consonantMappingOffsets: { | |
| b: 0x0a, | |
| k: 0x03, | |
| d: 0x07, | |
| g: 0x04, | |
| h: 0x11, | |
| l: 0x0e, | |
| m: 0x0b, | |
| n: 0x08, | |
| ng: 0x05, | |
| p: 0x09, | |
| r: 0x0d, | |
| s: 0x10, | |
| t: 0x06, | |
| w: 0x0f, | |
| y: 0x0c | |
| }, | |
| }, | |
| buhid: { | |
| start: 0x1740, | |
| consonantMappingOffsets: { | |
| b: 0x0a, | |
| k: 0x03, | |
| d: 0x07, | |
| g: 0x04, | |
| h: 0x11, | |
| l: 0x0e, | |
| m: 0x0b, | |
| n: 0x08, | |
| ng: 0x05, | |
| p: 0x09, | |
| r: 0x0d, | |
| s: 0x10, | |
| t: 0x06, | |
| w: 0x0f, | |
| y: 0x0c | |
| }, | |
| }, | |
| tagbanwa: { | |
| start: 0x1760, | |
| consonantMappingOffsets: { | |
| b: 0x0a, | |
| k: 0x03, | |
| d: 0x07, | |
| g: 0x04, | |
| h: null, | |
| l: 0x0e, | |
| m: 0x0b, | |
| n: 0x08, | |
| ng: 0x05, | |
| p: 0x09, | |
| r: 0x07, | |
| s: 0x10, | |
| t: 0x06, | |
| w: 0x0f, | |
| y: 0x0c | |
| }, | |
| }, | |
| } | |
| const initialVowelOffsets = { | |
| a: 0x00, | |
| e: 0x01, | |
| i: 0x01, | |
| o: 0x02, | |
| u: 0x02, | |
| } | |
| const vowels = { | |
| a: null, | |
| e: 0x12, | |
| i: 0x12, | |
| o: 0x13, | |
| u: 0x13, | |
| } | |
| export default (s, { script = 'tagalog', } = {}) => { | |
| const { [script]: scriptDef, } = scripts | |
| const { start, consonantMappingOffsets, } = scriptDef | |
| return ( | |
| s | |
| .toLowerCase() | |
| // special characters | |
| .replace(/[\]_^[}{@#$%&*()<>+=|"'\/-]/g, '') | |
| // sentence ending characters | |
| .replace(/[.!?]\s*/g, '᜶') | |
| // sentence subdivision characters (breath marks) | |
| .replace(/[,:;]\s*/g, '᜵') | |
| // remove double spacing | |
| .replace(/[ ][ ]+/g, ' ') | |
| // borrowed from Spanish, use pronunciation | |
| .replace(/c([aou])/g, (_, vowel) => `k${vowel}`) | |
| .replace(/c([ei])/g, (_, vowel) => `s${vowel}`) | |
| .replace(/f/g, 'p') | |
| .replace(/ñ/g, 'ny') | |
| .replace(/j/g, 'h') | |
| .replace(/qu([ei])/, (_, vowel) => `k${vowel}`) | |
| .replace(/qu([aou])/, (_, vowel) => `kuw${vowel}`) | |
| .replace(/v/g, 'b') | |
| .replace(/x/, 'ks') | |
| .replace(/z/g, 's') | |
| // replace these special words | |
| .replace(/\bng\b/g, 'nang') | |
| .replace(/\bmga\b/g, 'manga') | |
| // perform main conversion | |
| .replace(/(ng|g|b|k|[rd]|h|l|m|n|p|s|t|w|y)(a|[ei]|[ou])?/g, (_, consonant, vowel) => { | |
| const { [vowel]: theVowelOffset = 0x14 } = vowels | |
| if (consonantMappingOffsets[consonant] === null) { | |
| return String.fromCodePoint(start + initialVowelOffsets[vowel]) | |
| } | |
| const theConsonant = String.fromCodePoint(start + consonantMappingOffsets[consonant]) | |
| const theVowel = theVowelOffset === null ? '' : String.fromCodePoint(start + theVowelOffset) | |
| return `${theConsonant}${theVowel}` | |
| }) | |
| .replace(/(a|[ei]|[ou])/g, (_, vowel) => String.fromCodePoint(start + initialVowelOffsets[vowel])) | |
| ) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment