Skip to content

Instantly share code, notes, and snippets.

@TheoryOfNekomata
Last active February 17, 2020 13:59
Show Gist options
  • Select an option

  • Save TheoryOfNekomata/a366cb4b31932185eee1b8d8429a9fb4 to your computer and use it in GitHub Desktop.

Select an option

Save TheoryOfNekomata/a366cb4b31932185eee1b8d8429a9fb4 to your computer and use it in GitHub Desktop.
Tagalog Baybayin converter
const scripts = {
tagalog: {
start: 0x1700,
consonantMappingOffsets: {
b: 0x0a,
k: 0x03,
d: 0x07,
g: 0x04,
h: 0x11,
l: 0x0e,
m: 0x0b,
n: 0x08,
ng: 0x05,
p: 0x09,
r: 0x07,
s: 0x10,
t: 0x06,
w: 0x0f,
y: 0x0c
},
},
hanunoo: {
start: 0x1720,
consonantMappingOffsets: {
b: 0x0a,
k: 0x03,
d: 0x07,
g: 0x04,
h: 0x11,
l: 0x0e,
m: 0x0b,
n: 0x08,
ng: 0x05,
p: 0x09,
r: 0x0d,
s: 0x10,
t: 0x06,
w: 0x0f,
y: 0x0c
},
},
buhid: {
start: 0x1740,
consonantMappingOffsets: {
b: 0x0a,
k: 0x03,
d: 0x07,
g: 0x04,
h: 0x11,
l: 0x0e,
m: 0x0b,
n: 0x08,
ng: 0x05,
p: 0x09,
r: 0x0d,
s: 0x10,
t: 0x06,
w: 0x0f,
y: 0x0c
},
},
tagbanwa: {
start: 0x1760,
consonantMappingOffsets: {
b: 0x0a,
k: 0x03,
d: 0x07,
g: 0x04,
h: null,
l: 0x0e,
m: 0x0b,
n: 0x08,
ng: 0x05,
p: 0x09,
r: 0x07,
s: 0x10,
t: 0x06,
w: 0x0f,
y: 0x0c
},
},
}
const initialVowelOffsets = {
a: 0x00,
e: 0x01,
i: 0x01,
o: 0x02,
u: 0x02,
}
const vowels = {
a: null,
e: 0x12,
i: 0x12,
o: 0x13,
u: 0x13,
}
export default (s, { script = 'tagalog', } = {}) => {
const { [script]: scriptDef, } = scripts
const { start, consonantMappingOffsets, } = scriptDef
return (
s
.toLowerCase()
// special characters
.replace(/[\]_^[}{@#$%&*()<>+=|"'\/-]/g, '')
// sentence ending characters
.replace(/[.!?]\s*/g, '᜶')
// sentence subdivision characters (breath marks)
.replace(/[,:;]\s*/g, '᜵')
// remove double spacing
.replace(/[ ][ ]+/g, ' ')
// borrowed from Spanish, use pronunciation
.replace(/c([aou])/g, (_, vowel) => `k${vowel}`)
.replace(/c([ei])/g, (_, vowel) => `s${vowel}`)
.replace(/f/g, 'p')
.replace(/ñ/g, 'ny')
.replace(/j/g, 'h')
.replace(/qu([ei])/, (_, vowel) => `k${vowel}`)
.replace(/qu([aou])/, (_, vowel) => `kuw${vowel}`)
.replace(/v/g, 'b')
.replace(/x/, 'ks')
.replace(/z/g, 's')
// replace these special words
.replace(/\bng\b/g, 'nang')
.replace(/\bmga\b/g, 'manga')
// perform main conversion
.replace(/(ng|g|b|k|[rd]|h|l|m|n|p|s|t|w|y)(a|[ei]|[ou])?/g, (_, consonant, vowel) => {
const { [vowel]: theVowelOffset = 0x14 } = vowels
if (consonantMappingOffsets[consonant] === null) {
return String.fromCodePoint(start + initialVowelOffsets[vowel])
}
const theConsonant = String.fromCodePoint(start + consonantMappingOffsets[consonant])
const theVowel = theVowelOffset === null ? '' : String.fromCodePoint(start + theVowelOffset)
return `${theConsonant}${theVowel}`
})
.replace(/(a|[ei]|[ou])/g, (_, vowel) => String.fromCodePoint(start + initialVowelOffsets[vowel]))
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment