benlangfeld · December 11, 2010 16:53
diff --git a/transliteration_controller.rb b/transliteration_controller.rb
 # coding: utf-8
 class TransliterationController < ApplicationController
 def view
    
 end

 def result
    if request.post?
    @errors = []
    @result = transliterate(:script, :text);
    end
 end

 private

 def transliterate(script, text)
    if params[script].blank? or params[text].blank?
    @errors << "#{script} or #{text} missing"
    else
    begin
        @russianVowels = ["а", "о", "и", "у", "ы", "э", "я", "e", "ё", "ю"]
        case params[script]
            when "en"
                params[text].reverse # just for now ;)
            when "pl"
                @t = {
                    # vowels?
                    "а" => {"replace_with" => "a"},
                    "б" => {"replace_with" => "b"},
                    "в" => {"replace_with" => "w"},
                    "г" => {"replace_with" => "g"},
                    "д" => {"replace_with" => "d"},
                    "e" => {"replace_with" => "ie",
                            "begins_with" => "je",
                            "after_vowels" => "je",
                            "after" => {
                                "ъ" => "je",
                                "ь" => "je",
                                "ж" => "e",
                                "л" => "e",
                                "ц" => "e",
                                "ч" => "e",
                                "ш" => "e",
                                "щ" => "e"
                                }
                            },
                    "ё" => {"replace_with" => "io",
                            "begins_with" => "jo",
                            "after_vowels" => "jo",
                            "after" => {
                                "ъ" => "jo",
                                "ь" => "jo",
                                "ж" => "o",
                                "л" => "o",
                                "ц" => "o",
                                "ч" => "o",
                                "ш" => "o",
                                "щ" => "o"
                                }
                            },
                    "ж" => {"replace_with" => "ż"},
                    "з" => {"replace_with" => "z"},
                    "и" => {"replace_with" => "i",
                            "after" => {
                                "ь" => "ji",
                                "ж" => "y",
                                "ц" => "y",
                                "ш" => "y"
                                }
                            },
                    "й" => {"replace_with" => "j"},
                    "к" => {"replace_with" => "k"},
                    "л" => {"replace_with" => "ł",
                            "before" => {
                                "е" => "l",
                                "ё" => "l",
                                "и" => "l",
                                "ь" => "l",
                                "ю" => "l",
                                "я" => "l"
                                }
                            },
                    "м" => {"replace_with" => "m"},
                    "н" => {"replace_with" => "n"},
                    "о" => {"replace_with" => "o"},
                    "п" => {"replace_with" => "p"},
                    "р" => {"replace_with" => "r"},
                    "с" => {"replace_with" => "s"},
                    "т" => {"replace_with" => "t"},
                    "у" => {"replace_with" => "u"},
                    "ф" => {"replace_with" => "f"},
                    "х" => {"replace_with" => "ch"},
                    "ц" => {"replace_with" => "c"},
                    "ч" => {"replace_with" => "cz"},
                    "ш" => {"replace_with" => "sz"},
                    "щ" => {"replace_with" => "szcz"},
                    "ъ" => {"replace_with" => ""},
                    "ы" => {"replace_with" => "y"},
                    "ь" => {"replace_with" => "´",
                            "before_vowels" => '',
                            "after" => {
                                "л" => '',
                                "ж" => '',
                                "ш" => '',
                                "ч" => '',
                                "щ" => ''
                                }
                            },
                    "э" => {"replace_with" => "e"},
                    "ю" => {"replace_with" => "iu",
                            "begins_with" => "ju",
                            "after_vowels" => "ju",
                            "after" => {
                                "ъ" => "ju",
                                "ь" => "ju",
                                "л" => "u",
                                }
                            },
                    "я" => {"replace_with" => "ia",
                            "begins_with" => "ja",
                            "after_vowels" => "ja",
                            "after" => {
                                "ъ" => "ja",
                                "ь" => "ja",
                                "л" => "a",
                                }
                            }
                }
                ary = params[text].split(" ")
                text = ''
                ary.each do |word|
                    # do magic
                    word_done = ''
                    word.mb_chars.length.times do |x| # TODO: probably there's @string.each_char? ;)
                        p = word.mb_chars[x-1] # previous
                        c = word.mb_chars[x]   # current
                        n = word.mb_chars[x+1] # next
                        # lowercases; needed for @t
                        pl = Unicode::downcase(Unicode::normalize_KC(p));
                        cl = Unicode::downcase(Unicode::normalize_KC(c));
                        nl = Unicode::downcase(Unicode::normalize_KC(n));
                        
                        if @t[cl].nil? # a character exists in the transAry
                            word_done << c # unchanged, we don't have that character in the @t - transAry
                        else
                            if @t[pl].nil? and @t[nl].nil? 
                                word_done << proper_case(c,cl,@t[cl]["replace_with"]) # one-letter word, let's just replace
                            else if @t[pl].nil? # if it starts the word, let's check just these conditions
                                word_done << check_before(c,n,cl,nl)
                            else if @t[nl].nil? # if it ends the word, let's check just these conditions
                                word_done << check_after(p,c,pl,cl)
                            else
                                word_done << check_all(p,c,n,pl,cl,nl) # here we already know we are in a middle of a word
                            end # end if
                        end # end if
                    end # end do
                    text << word_done.join()
                end # end ary
                text.join(' ')
            end # end case params[script]
        end # end begin
    rescue Exception => e
        @errors << e.message
    end
    end
 end

 def check_after(p,c,pl,cl)
    if @t[params[cl]]["ends_with"] != nil
        proper_case(params[c], params[cl], @t[params[cl]]["ends_with"])
    else if @t[params[cl]]["after_vowels"] != nil && is_vowel(pl)
        proper_case(params[c], params[cl], @t[params[cl]]["after_vowels"])
    else if @t[params[cl]]["after"] != nil and @t[params[cl]]["after"][pl] != nil
        proper_case(params[c], params[cl], @t[params[cl]]["after"][pl])
    else
        proper_case(params[c], params[cl], @t[params[cl]]["replace_with"])
    end
 end

 def check_before(c,n,cl,nl)
    if @t[params[cl]]["begins_with"] != nil
        proper_case(params[c], params[cl], @t[params[cl]]["begins_with"])
    else if @t[params[cl]]["before_vowels"] != nil && is_vowel(nl)
        proper_case(params[c], params[cl], @t[params[cl]]["before_vowels"])
    else if @t[params[cl]]["before"] != nil and @t[params[cl]]["before"][nl] != nil
        proper_case(params[c], params[cl], @t[params[cl]]["before"][nl])
    else
        proper_case(params[c], params[cl], @t[params[cl]]["replace_with"])
    end
 end

 def check_all(p,c,n,pl,cl,nl) 
    if @t[params[cl]]["after_vowels"] != nil && is_vowel(pl)
        proper_case(params[c], params[cl], @t[params[cl]]["after_vowels"])
    else if @t[params[cl]]["after"] != nil and @t[params[cl]]["after"][pl] != nil
        proper_case(params[c], params[cl], @t[params[cl]]["after"][pl])
    else if @t[params[cl]]["before_vowels"] != nil && is_vowel(nl)
        proper_case(params[c], params[cl], @t[params[cl]]["before_vowels"])
    else if @t[params[cl]]["before"] != nil and @t[params[cl]]["before"][nl] != nil
        proper_case(params[c], params[cl], @t[params[cl]]["before"][nl])
    else
        proper_case(params[c], params[cl], @t[params[cl]]["replace_with"])
    end
 end

 def is_vowel(sign)
    @russianVowels[sign].nil?
 end

 def proper_case(orig, downcase, trans)
    if params[orig] == params[downcase]
        params[trans]
    else 
        Unicode::capitalize(Unicode::normalize_KC(params[trans]))
    end
 end
 end
	# coding: utf-8
	class TransliterationController < ApplicationController
	def view

	end

	def result
	if request.post?
	@errors = []
	@result = transliterate(:script, :text);
	end
	end

	private

	def transliterate(script, text)
	if params[script].blank? or params[text].blank?
	@errors << "#{script} or #{text} missing"
	else
	begin
	@russianVowels = ["а", "о", "и", "у", "ы", "э", "я", "e", "ё", "ю"]
	case params[script]
	when "en"
	params[text].reverse # just for now ;)
	when "pl"
	@t = {
	# vowels?
	"а" => {"replace_with" => "a"},
	"б" => {"replace_with" => "b"},
	"в" => {"replace_with" => "w"},
	"г" => {"replace_with" => "g"},
	"д" => {"replace_with" => "d"},
	"e" => {"replace_with" => "ie",
	"begins_with" => "je",
	"after_vowels" => "je",
	"after" => {
	"ъ" => "je",
	"ь" => "je",
	"ж" => "e",
	"л" => "e",
	"ц" => "e",
	"ч" => "e",
	"ш" => "e",
	"щ" => "e"
	}
	},
	"ё" => {"replace_with" => "io",
	"begins_with" => "jo",
	"after_vowels" => "jo",
	"after" => {
	"ъ" => "jo",
	"ь" => "jo",
	"ж" => "o",
	"л" => "o",
	"ц" => "o",
	"ч" => "o",
	"ш" => "o",
	"щ" => "o"
	}
	},
	"ж" => {"replace_with" => "ż"},
	"з" => {"replace_with" => "z"},
	"и" => {"replace_with" => "i",
	"after" => {
	"ь" => "ji",
	"ж" => "y",
	"ц" => "y",
	"ш" => "y"
	}
	},
	"й" => {"replace_with" => "j"},
	"к" => {"replace_with" => "k"},
	"л" => {"replace_with" => "ł",
	"before" => {
	"е" => "l",
	"ё" => "l",
	"и" => "l",
	"ь" => "l",
	"ю" => "l",
	"я" => "l"
	}
	},
	"м" => {"replace_with" => "m"},
	"н" => {"replace_with" => "n"},
	"о" => {"replace_with" => "o"},
	"п" => {"replace_with" => "p"},
	"р" => {"replace_with" => "r"},
	"с" => {"replace_with" => "s"},
	"т" => {"replace_with" => "t"},
	"у" => {"replace_with" => "u"},
	"ф" => {"replace_with" => "f"},
	"х" => {"replace_with" => "ch"},
	"ц" => {"replace_with" => "c"},
	"ч" => {"replace_with" => "cz"},
	"ш" => {"replace_with" => "sz"},
	"щ" => {"replace_with" => "szcz"},
	"ъ" => {"replace_with" => ""},
	"ы" => {"replace_with" => "y"},
	"ь" => {"replace_with" => "´",
	"before_vowels" => '',
	"after" => {
	"л" => '',
	"ж" => '',
	"ш" => '',
	"ч" => '',
	"щ" => ''
	}
	},
	"э" => {"replace_with" => "e"},
	"ю" => {"replace_with" => "iu",
	"begins_with" => "ju",
	"after_vowels" => "ju",
	"after" => {
	"ъ" => "ju",
	"ь" => "ju",
	"л" => "u",
	}
	},
	"я" => {"replace_with" => "ia",
	"begins_with" => "ja",
	"after_vowels" => "ja",
	"after" => {
	"ъ" => "ja",
	"ь" => "ja",
	"л" => "a",
	}
	}
	}
	ary = params[text].split(" ")
	text = ''
	ary.each do \|word\|
	# do magic
	word_done = ''
	word.mb_chars.length.times do \|x\| # TODO: probably there's @string.each_char? ;)
	p = word.mb_chars[x-1] # previous
	c = word.mb_chars[x] # current
	n = word.mb_chars[x+1] # next
	# lowercases; needed for @t
	pl = Unicode::downcase(Unicode::normalize_KC(p));
	cl = Unicode::downcase(Unicode::normalize_KC(c));
	nl = Unicode::downcase(Unicode::normalize_KC(n));

	if @t[cl].nil? # a character exists in the transAry
	word_done << c # unchanged, we don't have that character in the @t - transAry
	else
	if @t[pl].nil? and @t[nl].nil?
	word_done << proper_case(c,cl,@t[cl]["replace_with"]) # one-letter word, let's just replace
	else if @t[pl].nil? # if it starts the word, let's check just these conditions
	word_done << check_before(c,n,cl,nl)
	else if @t[nl].nil? # if it ends the word, let's check just these conditions
	word_done << check_after(p,c,pl,cl)
	else
	word_done << check_all(p,c,n,pl,cl,nl) # here we already know we are in a middle of a word
	end # end if
	end # end if
	end # end do
	text << word_done.join()
	end # end ary
	text.join(' ')
	end # end case params[script]
	end # end begin
	rescue Exception => e
	@errors << e.message
	end
	end
	end

	def check_after(p,c,pl,cl)
	if @t[params[cl]]["ends_with"] != nil
	proper_case(params[c], params[cl], @t[params[cl]]["ends_with"])
	else if @t[params[cl]]["after_vowels"] != nil && is_vowel(pl)
	proper_case(params[c], params[cl], @t[params[cl]]["after_vowels"])
	else if @t[params[cl]]["after"] != nil and @t[params[cl]]["after"][pl] != nil
	proper_case(params[c], params[cl], @t[params[cl]]["after"][pl])
	else
	proper_case(params[c], params[cl], @t[params[cl]]["replace_with"])
	end
	end

	def check_before(c,n,cl,nl)
	if @t[params[cl]]["begins_with"] != nil
	proper_case(params[c], params[cl], @t[params[cl]]["begins_with"])
	else if @t[params[cl]]["before_vowels"] != nil && is_vowel(nl)
	proper_case(params[c], params[cl], @t[params[cl]]["before_vowels"])
	else if @t[params[cl]]["before"] != nil and @t[params[cl]]["before"][nl] != nil
	proper_case(params[c], params[cl], @t[params[cl]]["before"][nl])
	else
	proper_case(params[c], params[cl], @t[params[cl]]["replace_with"])
	end
	end

	def check_all(p,c,n,pl,cl,nl)
	if @t[params[cl]]["after_vowels"] != nil && is_vowel(pl)
	proper_case(params[c], params[cl], @t[params[cl]]["after_vowels"])
	else if @t[params[cl]]["after"] != nil and @t[params[cl]]["after"][pl] != nil
	proper_case(params[c], params[cl], @t[params[cl]]["after"][pl])
	else if @t[params[cl]]["before_vowels"] != nil && is_vowel(nl)
	proper_case(params[c], params[cl], @t[params[cl]]["before_vowels"])
	else if @t[params[cl]]["before"] != nil and @t[params[cl]]["before"][nl] != nil
	proper_case(params[c], params[cl], @t[params[cl]]["before"][nl])
	else
	proper_case(params[c], params[cl], @t[params[cl]]["replace_with"])
	end
	end

	def is_vowel(sign)
	@russianVowels[sign].nil?
	end

	def proper_case(orig, downcase, trans)
	if params[orig] == params[downcase]
	params[trans]
	else
	Unicode::capitalize(Unicode::normalize_KC(params[trans]))
	end
	end
	end