Created
March 24, 2009 16:30
-
-
Save jamesp/84194 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Tones = { | |
1 => :flat, | |
2 => :up, | |
3 => :downup, | |
4 => :down, | |
5 => :notone, | |
} | |
# where v = umlaut | |
Vowels = [ | |
'a', | |
'ai', | |
'an', | |
'ao', | |
'ang', | |
'o', | |
'ou', | |
'ong', | |
'e', | |
'ei', | |
'en', | |
'er', | |
'eng', | |
'i', | |
'ia', | |
'ie', | |
'iao', | |
'iou', 'iu', # synonyms | |
'ian', | |
'in', | |
'iang', | |
'ing', | |
'iong', | |
'u', | |
'ua', | |
'uo', | |
'uai', | |
'uei', 'ui', # synonyms | |
'uan', | |
'uen', 'un', # synonyms | |
'uang', | |
'ueng', | |
'v', | |
've', | |
'van', | |
'vn', | |
] | |
Consonants = [ | |
'b', | |
'p', | |
'm', | |
'f', | |
'd', | |
't', | |
'n', | |
'l', | |
'g', | |
'k', | |
'h', | |
'j', | |
'q', | |
'x', | |
'zh', | |
'ch', | |
'sh', | |
'r', | |
'z', | |
'c', | |
's' | |
] | |
Converters = [ | |
['ie([1-5])', 'ie$1'], | |
['ia([1-5])', 'ia$1'], | |
['iu([1-5])', 'iu$1'], | |
['([aeiouv])n([1-5])', '$1$2n'], | |
['([aeiouv])ng([1-5])', '$1$2ng'], | |
['([aeiouv][aeiouv])([aeiouv])([1-5])', '$1$3$2'], | |
['([aeiouv])([aeiouv])([1-5])', '$1$3$2'], | |
['([aeiouv][1-5])', '$1'], | |
['([aeiouv])' , '$1'] | |
] | |
Unicode_characters = { | |
'a1' => '', | |
'a2' => '', | |
'a3' => '', | |
'a4' => '', | |
'e1' => '', | |
'e2' => '', | |
'e3' => '', | |
'e4' => '', | |
'i1' => '', | |
'i2' => '', | |
'i3' => '', | |
'i4' => '', | |
'o1' => '', | |
'o2' => '', | |
'o3' => '', | |
'o4' => '', | |
'o5' => '', | |
'u1' => '', | |
'u2' => '', | |
'u3' => '', | |
'u4' => '', | |
'u5' => '', | |
'v' => '', | |
'v1' => '', | |
'v2' => '', | |
'v3' => '', | |
'v4' => '', | |
} | |
def partial_convert(from) | |
s = from.split("+").each do |str| | |
Converters.each do |c| | |
str.gsub!("#{c[0][^+]", "#c[1]+") # find all the patterns and mark them as dealt with by appending '+' | |
str.replace!("+","") # remove all the + we added in. | |
end | |
end | |
s.join("+") # add back in the + included in the original string | |
end | |
def to_unicode(from) | |
s = partial_convert from | |
Unicode_characters.each do |syntax, code| | |
s.gsub! syntax, code | |
end | |
s | |
end | |
puts to_unicode 'mei2 guan1xi' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment