Created
December 11, 2010 16:48
-
-
Save Holek/737460 to your computer and use it in GitHub Desktop.
Transliteration controller with Unicode class problem
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
class TransliterationController < ApplicationController | |
def view | |
end | |
def result | |
if request.post? | |
@errors = [] | |
@result = transliterate(:script, :text); | |
end | |
end | |
private | |
def transliterate(script, text) | |
if params[script].blank? or params[text].blank? | |
@errors << "#{script} or #{text} missing" | |
else | |
begin | |
@russianVowels = ["а", "о", "и", "у", "ы", "э", "я", "e", "ё", "ю"] | |
case params[script] | |
when "en" | |
Iconv.iconv('ascii//ignore//translit', 'utf-8', params[text]).to_s.join() # okay... iconv sucks | |
when "pl" | |
@t = { | |
"а" => {"replace_with" => "a"}, | |
"б" => {"replace_with" => "b"}, | |
"в" => {"replace_with" => "w"}, | |
"г" => {"replace_with" => "g"}, | |
"д" => {"replace_with" => "d"}, | |
"e" => {"replace_with" => "ie", | |
"begins_with" => "je", | |
"after_vowels" => "je", | |
"after" => { | |
"ъ" => "je", | |
"ь" => "je", | |
"ж" => "e", | |
"л" => "e", | |
"ц" => "e", | |
"ч" => "e", | |
"ш" => "e", | |
"щ" => "e" | |
} | |
}, | |
"ё" => {"replace_with" => "io", | |
"begins_with" => "jo", | |
"after_vowels" => "jo", | |
"after" => { | |
"ъ" => "jo", | |
"ь" => "jo", | |
"ж" => "o", | |
"л" => "o", | |
"ц" => "o", | |
"ч" => "o", | |
"ш" => "o", | |
"щ" => "o" | |
} | |
}, | |
"ж" => {"replace_with" => "ż"}, | |
"з" => {"replace_with" => "z"}, | |
"и" => {"replace_with" => "i", | |
"after" => { | |
"ь" => "ji", | |
"ж" => "y", | |
"ц" => "y", | |
"ш" => "y" | |
} | |
}, | |
"й" => {"replace_with" => "j"}, | |
"к" => {"replace_with" => "k"}, | |
"л" => {"replace_with" => "ł", | |
"before" => { | |
"е" => "l", | |
"ё" => "l", | |
"и" => "l", | |
"ь" => "l", | |
"ю" => "l", | |
"я" => "l" | |
} | |
}, | |
"м" => {"replace_with" => "m"}, | |
"н" => {"replace_with" => "n"}, | |
"о" => {"replace_with" => "o"}, | |
"п" => {"replace_with" => "p"}, | |
"р" => {"replace_with" => "r"}, | |
"с" => {"replace_with" => "s"}, | |
"т" => {"replace_with" => "t"}, | |
"у" => {"replace_with" => "u"}, | |
"ф" => {"replace_with" => "f"}, | |
"х" => {"replace_with" => "ch"}, | |
"ц" => {"replace_with" => "c"}, | |
"ч" => {"replace_with" => "cz"}, | |
"ш" => {"replace_with" => "sz"}, | |
"щ" => {"replace_with" => "szcz"}, | |
"ъ" => {"replace_with" => ""}, | |
"ы" => {"replace_with" => "y"}, | |
"ь" => {"replace_with" => "´", | |
"before_vowels" => '', | |
"after" => { | |
"л" => '', | |
"ж" => '', | |
"ш" => '', | |
"ч" => '', | |
"щ" => '' | |
} | |
}, | |
"э" => {"replace_with" => "e"}, | |
"ю" => {"replace_with" => "iu", | |
"begins_with" => "ju", | |
"after_vowels" => "ju", | |
"after" => { | |
"ъ" => "ju", | |
"ь" => "ju", | |
"л" => "u", | |
} | |
}, | |
"я" => {"replace_with" => "ia", | |
"begins_with" => "ja", | |
"after_vowels" => "ja", | |
"after" => { | |
"ъ" => "ja", | |
"ь" => "ja", | |
"л" => "a", | |
} | |
} | |
} | |
ary = params[text].split(" ") | |
text = '' | |
ary.each do |word| | |
# do magic | |
word_done = '' | |
word.mb_chars.length.times do |x| # TODO: probably there's @string.each_char? ;) | |
p = word.mb_chars[x-1] # previous | |
c = word.mb_chars[x] # current | |
n = word.mb_chars[x+1] # next | |
# lowercases; needed for @t | |
pl = Unicode.downcase(Unicode.normalize_KC(p)); | |
cl = Unicode.downcase(Unicode.normalize_KC(c)); | |
nl = Unicode.downcase(Unicode.normalize_KC(n)); | |
if @t[cl].nil? # a character exists in the transAry | |
word_done << c # unchanged, we don't have that character in the @t - transAry | |
else | |
if @t[pl].nil? and @t[nl].nil? | |
word_done << proper_case(c,cl,@t[cl]["replace_with"]) # one-letter word, let's just replace | |
elsif @t[pl].nil? # if it starts the word, let's check just these conditions | |
word_done << check_before(c,n,cl,nl) | |
elsif @t[nl].nil? # if it ends the word, let's check just these conditions | |
word_done << check_after(p,c,pl,cl) | |
else | |
word_done << check_all(p,c,n,pl,cl,nl) # here we already know we are in a middle of a word | |
end # end if | |
end # end if | |
end # end do | |
text << word_done.join() | |
end # end ary | |
text.join(' ') | |
end # end case params[script] | |
rescue Exception => e | |
@errors << e.message | |
end | |
end | |
end | |
def check_after(p,c,pl,cl) | |
if @t[params[cl]]["ends_with"] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["ends_with"]) | |
elsif @t[params[cl]]["after_vowels"] != nil && is_vowel(pl) | |
proper_case(params[c], params[cl], @t[params[cl]]["after_vowels"]) | |
elsif @t[params[cl]]["after"] != nil and @t[params[cl]]["after"][pl] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["after"][pl]) | |
else | |
proper_case(params[c], params[cl], @t[params[cl]]["replace_with"]) | |
end | |
end | |
def check_before(c,n,cl,nl) | |
if @t[params[cl]]["begins_with"] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["begins_with"]) | |
elsif @t[params[cl]]["before_vowels"] != nil && is_vowel(nl) | |
proper_case(params[c], params[cl], @t[params[cl]]["before_vowels"]) | |
elsif @t[params[cl]]["before"] != nil and @t[params[cl]]["before"][nl] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["before"][nl]) | |
else | |
proper_case(params[c], params[cl], @t[params[cl]]["replace_with"]) | |
end | |
end | |
def check_all(p,c,n,pl,cl,nl) | |
if @t[params[cl]]["after_vowels"] != nil && is_vowel(pl) | |
proper_case(params[c], params[cl], @t[params[cl]]["after_vowels"]) | |
elsif @t[params[cl]]["after"] != nil and @t[params[cl]]["after"][pl] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["after"][pl]) | |
elsif @t[params[cl]]["before_vowels"] != nil && is_vowel(nl) | |
proper_case(params[c], params[cl], @t[params[cl]]["before_vowels"]) | |
elsif @t[params[cl]]["before"] != nil and @t[params[cl]]["before"][nl] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["before"][nl]) | |
else | |
proper_case(params[c], params[cl], @t[params[cl]]["replace_with"]) | |
end | |
end | |
def is_vowel(sign) | |
@russianVowels[sign].nil? | |
end | |
def proper_case(orig, downcase, trans) | |
if params[orig] == params[downcase] | |
params[trans] | |
else | |
Unicode.capitalize(Unicode.normalize_KC(params[trans])) | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment