-
-
Save benlangfeld/737463 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
class TransliterationController < ApplicationController | |
def view | |
end | |
def result | |
if request.post? | |
@errors = [] | |
@result = transliterate(:script, :text); | |
end | |
end | |
private | |
def transliterate(script, text) | |
if params[script].blank? or params[text].blank? | |
@errors << "#{script} or #{text} missing" | |
else | |
begin | |
@russianVowels = ["а", "о", "и", "у", "ы", "э", "я", "e", "ё", "ю"] | |
case params[script] | |
when "en" | |
params[text].reverse # just for now ;) | |
when "pl" | |
@t = { | |
# vowels? | |
"а" => {"replace_with" => "a"}, | |
"б" => {"replace_with" => "b"}, | |
"в" => {"replace_with" => "w"}, | |
"г" => {"replace_with" => "g"}, | |
"д" => {"replace_with" => "d"}, | |
"e" => {"replace_with" => "ie", | |
"begins_with" => "je", | |
"after_vowels" => "je", | |
"after" => { | |
"ъ" => "je", | |
"ь" => "je", | |
"ж" => "e", | |
"л" => "e", | |
"ц" => "e", | |
"ч" => "e", | |
"ш" => "e", | |
"щ" => "e" | |
} | |
}, | |
"ё" => {"replace_with" => "io", | |
"begins_with" => "jo", | |
"after_vowels" => "jo", | |
"after" => { | |
"ъ" => "jo", | |
"ь" => "jo", | |
"ж" => "o", | |
"л" => "o", | |
"ц" => "o", | |
"ч" => "o", | |
"ш" => "o", | |
"щ" => "o" | |
} | |
}, | |
"ж" => {"replace_with" => "ż"}, | |
"з" => {"replace_with" => "z"}, | |
"и" => {"replace_with" => "i", | |
"after" => { | |
"ь" => "ji", | |
"ж" => "y", | |
"ц" => "y", | |
"ш" => "y" | |
} | |
}, | |
"й" => {"replace_with" => "j"}, | |
"к" => {"replace_with" => "k"}, | |
"л" => {"replace_with" => "ł", | |
"before" => { | |
"е" => "l", | |
"ё" => "l", | |
"и" => "l", | |
"ь" => "l", | |
"ю" => "l", | |
"я" => "l" | |
} | |
}, | |
"м" => {"replace_with" => "m"}, | |
"н" => {"replace_with" => "n"}, | |
"о" => {"replace_with" => "o"}, | |
"п" => {"replace_with" => "p"}, | |
"р" => {"replace_with" => "r"}, | |
"с" => {"replace_with" => "s"}, | |
"т" => {"replace_with" => "t"}, | |
"у" => {"replace_with" => "u"}, | |
"ф" => {"replace_with" => "f"}, | |
"х" => {"replace_with" => "ch"}, | |
"ц" => {"replace_with" => "c"}, | |
"ч" => {"replace_with" => "cz"}, | |
"ш" => {"replace_with" => "sz"}, | |
"щ" => {"replace_with" => "szcz"}, | |
"ъ" => {"replace_with" => ""}, | |
"ы" => {"replace_with" => "y"}, | |
"ь" => {"replace_with" => "´", | |
"before_vowels" => '', | |
"after" => { | |
"л" => '', | |
"ж" => '', | |
"ш" => '', | |
"ч" => '', | |
"щ" => '' | |
} | |
}, | |
"э" => {"replace_with" => "e"}, | |
"ю" => {"replace_with" => "iu", | |
"begins_with" => "ju", | |
"after_vowels" => "ju", | |
"after" => { | |
"ъ" => "ju", | |
"ь" => "ju", | |
"л" => "u", | |
} | |
}, | |
"я" => {"replace_with" => "ia", | |
"begins_with" => "ja", | |
"after_vowels" => "ja", | |
"after" => { | |
"ъ" => "ja", | |
"ь" => "ja", | |
"л" => "a", | |
} | |
} | |
} | |
ary = params[text].split(" ") | |
text = '' | |
ary.each do |word| | |
# do magic | |
word_done = '' | |
word.mb_chars.length.times do |x| # TODO: probably there's @string.each_char? ;) | |
p = word.mb_chars[x-1] # previous | |
c = word.mb_chars[x] # current | |
n = word.mb_chars[x+1] # next | |
# lowercases; needed for @t | |
pl = Unicode::downcase(Unicode::normalize_KC(p)); | |
cl = Unicode::downcase(Unicode::normalize_KC(c)); | |
nl = Unicode::downcase(Unicode::normalize_KC(n)); | |
if @t[cl].nil? # a character exists in the transAry | |
word_done << c # unchanged, we don't have that character in the @t - transAry | |
else | |
if @t[pl].nil? and @t[nl].nil? | |
word_done << proper_case(c,cl,@t[cl]["replace_with"]) # one-letter word, let's just replace | |
else if @t[pl].nil? # if it starts the word, let's check just these conditions | |
word_done << check_before(c,n,cl,nl) | |
else if @t[nl].nil? # if it ends the word, let's check just these conditions | |
word_done << check_after(p,c,pl,cl) | |
else | |
word_done << check_all(p,c,n,pl,cl,nl) # here we already know we are in a middle of a word | |
end # end if | |
end # end if | |
end # end do | |
text << word_done.join() | |
end # end ary | |
text.join(' ') | |
end # end case params[script] | |
end # end begin | |
rescue Exception => e | |
@errors << e.message | |
end | |
end | |
end | |
def check_after(p,c,pl,cl) | |
if @t[params[cl]]["ends_with"] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["ends_with"]) | |
else if @t[params[cl]]["after_vowels"] != nil && is_vowel(pl) | |
proper_case(params[c], params[cl], @t[params[cl]]["after_vowels"]) | |
else if @t[params[cl]]["after"] != nil and @t[params[cl]]["after"][pl] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["after"][pl]) | |
else | |
proper_case(params[c], params[cl], @t[params[cl]]["replace_with"]) | |
end | |
end | |
def check_before(c,n,cl,nl) | |
if @t[params[cl]]["begins_with"] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["begins_with"]) | |
else if @t[params[cl]]["before_vowels"] != nil && is_vowel(nl) | |
proper_case(params[c], params[cl], @t[params[cl]]["before_vowels"]) | |
else if @t[params[cl]]["before"] != nil and @t[params[cl]]["before"][nl] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["before"][nl]) | |
else | |
proper_case(params[c], params[cl], @t[params[cl]]["replace_with"]) | |
end | |
end | |
def check_all(p,c,n,pl,cl,nl) | |
if @t[params[cl]]["after_vowels"] != nil && is_vowel(pl) | |
proper_case(params[c], params[cl], @t[params[cl]]["after_vowels"]) | |
else if @t[params[cl]]["after"] != nil and @t[params[cl]]["after"][pl] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["after"][pl]) | |
else if @t[params[cl]]["before_vowels"] != nil && is_vowel(nl) | |
proper_case(params[c], params[cl], @t[params[cl]]["before_vowels"]) | |
else if @t[params[cl]]["before"] != nil and @t[params[cl]]["before"][nl] != nil | |
proper_case(params[c], params[cl], @t[params[cl]]["before"][nl]) | |
else | |
proper_case(params[c], params[cl], @t[params[cl]]["replace_with"]) | |
end | |
end | |
def is_vowel(sign) | |
@russianVowels[sign].nil? | |
end | |
def proper_case(orig, downcase, trans) | |
if params[orig] == params[downcase] | |
params[trans] | |
else | |
Unicode::capitalize(Unicode::normalize_KC(params[trans])) | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment