Last active
August 29, 2015 14:00
-
-
Save khacanh/11136458 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Z1Uni | |
def self.convert(input) | |
output = input.clone.force_encoding("UTF-8") | |
tallAA = "\u102B" | |
nAA = "\u102C" | |
vi = "\u102D" | |
ii = "\u102E" | |
u = "\u102F" | |
uu = "\u1030" | |
ve = "\u1031" | |
ai = "\u1032" | |
ans = "\u1036" | |
db = "\u1037" | |
visarga = "\u1038" | |
asat = "\u103A" | |
ya = "\u103B" | |
ra = "\u103C" | |
wa = "\u103D" | |
ha = "\u103E" | |
zero = "\u1040" | |
output.gsub!(/\u106A/, " \u1009") | |
output.gsub!(/\u1025(?=[\u1039\u102C])/, "\u1009") #new | |
output.gsub!(/\u1025\u102E/, "\u1026") #new | |
output.gsub!(/\u106B/, "\u100A") | |
output.gsub!(/\u1090/, "\u101B") | |
output.gsub!(/\u1040/, zero) | |
output.gsub!(/\u108F/, "\u1014") | |
output.gsub!(/\u1012/, "\u1012") | |
output.gsub!(/\u1013/, "\u1013") | |
output.gsub!(/[\u103D\u1087]/, ha) | |
output.gsub!(/\u103C/, wa) | |
output.gsub!(/[\u103B\u107E\u107F\u1080\u1081\u1082\u1083\u1084]/, ra) | |
output.gsub!(/[\u103A\u107D]/, ya) | |
output.gsub!(/\u103E\u103B/, "#{ya}#{ha}") | |
output.gsub!(/\u108A/, "#{wa}#{ha}") | |
output.gsub!(/\u103E\u103D/, "#{wa}#{ha}") | |
output.gsub!(/(\u1031)?(\u103C)?([\u1000-\u1021])\u1064/) { "\u1064#{$1}#{$2}#{$3}" } | |
output.gsub!(/(\u1031)?(\u103C)?([\u1000-\u1021])\u108B/) { "\u1064#{$1}#{$2}#{$3}\u102D" } | |
output.gsub!(/(\u1031)?(\u103C)?([\u1000-\u1021])\u108C/) { "\u1064#{$1}#{$2}#{$3}\u102E" } | |
output.gsub!(/(\u1031)?(\u103C)?([\u1000-\u1021])\u108D/) { "\u1064#{$1}#{$2}#{$3}\u1036" } | |
output.gsub!(/\u105A/, "#{tallAA}#{asat}") | |
output.gsub!(/\u108E/, "#{vi}#{ans}") | |
output.gsub!(/\u1033/, u) | |
output.gsub!(/\u1034/, uu) | |
output.gsub!(/\u1088/, "#{ha}#{u}") | |
output.gsub!(/\u1089/, "#{ha}#{uu}") | |
output.gsub!(/\u1039/, "\u103A") | |
output.gsub!(/[\u1094\u1095]/, db) | |
output.gsub!(/([\u1000-\u1021])([\u102C\u102D\u102E\u1032\u1036]){1,2}([\u1060\u1061\u1062\u1063\u1065\u1066\u1067\u1068\u1069\u1070\u1071\u1072\u1073\u1074\u1075\u1076\u1077\u1078\u1079\u107A\u107B\u107C\u1085])/) { "#{$1}#{$3}#{$2}" } #new | |
output.gsub!(/\u1064/, "\u1004\u103A\u1039") | |
output.gsub!(/\u104E/, "\u104E\u1004\u103A\u1038") | |
output.gsub!(/\u1086/, "\u103F") | |
output.gsub!(/\u1060/, "\u1039\u1000") | |
output.gsub!(/\u1061/, "\u1039\u1001") | |
output.gsub!(/\u1062/, "\u1039\u1002") | |
output.gsub!(/\u1063/, "\u1039\u1003") | |
output.gsub!(/\u1065/, "\u1039\u1005") | |
output.gsub!(/[\u1066\u1067]/, "\u1039\u1006") | |
output.gsub!(/\u1068/, "\u1039\u1007") | |
output.gsub!(/\u1069/, "\u1039\u1008") | |
output.gsub!(/\u106C/, "\u1039\u100B") | |
output.gsub!(/\u1070/, "\u1039\u100F") | |
output.gsub!(/[\u1071\u1072]/, "\u1039\u1010") | |
output.gsub!(/[\u1073\u1074]/, "\u1039\u1011") | |
output.gsub!(/\u1075/, "\u1039\u1012") | |
output.gsub!(/\u1076/, "\u1039\u1013") | |
output.gsub!(/\u1077/, "\u1039\u1014") | |
output.gsub!(/\u1078/, "\u1039\u1015") | |
output.gsub!(/\u1079/, "\u1039\u1016") | |
output.gsub!(/\u107A/, "\u1039\u1017") | |
output.gsub!(/\u107B/, "\u1039\u1018") | |
output.gsub!(/\u107C/, "\u1039\u1019") | |
output.gsub!(/\u1085/, "\u1039\u101C") | |
output.gsub!(/\u106D/, "\u1039\u100C") | |
output.gsub!(/\u1091/, "\u100F\u1039\u100D") | |
output.gsub!(/\u1092/, "\u100B\u1039\u100C") | |
output.gsub!(/\u1097/, "\u100B\u1039\u100B") | |
output.gsub!(/\u106F/, "\u100E\u1039\u100D") | |
output.gsub!(/\u106E/, "\u100D\u1039\u100D") | |
output.gsub!(/(\u103C)([\u1000-\u1021])(\u1039[\u1000-\u1021])?/) { "#{$2}#{$3}#{$1}" } | |
output.gsub!(/(\u103E)(\u103D)([\u103B\u103C])/) { "#{$3}#{$2}#{$1}" } | |
output.gsub!(/(\u103E)([\u103B\u103C])/) { "#{$2}#{$1}" } | |
output.gsub!(/(\u103D)([\u103B\u103C])/, "#{$2}#{$1}") | |
output.gsub!(/(([\u1000-\u101C\u101E-\u102A\u102C\u102E-\u103F\u104C-\u109F]))(\u1040)(?=\u0020)?/) { $1 ? "#{$1}\u101D" : "#{$&}#{$1}" } | |
output.gsub!(/((\u101D))(\u1040)(?=\u0020)?/) { $1 ? "#{$1}\u101D" : "#{$&}#{$1}" } | |
output.gsub!(/(([\u1000-\u101C\u101E-\u102A\u102C\u102E-\u103F\u104C-\u109F\u0020]))(\u1047)/) { $1 ? "#{$1}\u101B" : "#{$&}#{$1}" } | |
output.gsub!(/(\u1047)( ? = [\u1000 - \u101C\u101E - \u102A\u102C\u102E - \u103F\u104C - \u109F\u0020])/, "\u101B") | |
output.gsub!(/(\u1031)?([\u1000-\u1021])(\u1039[\u1000-\u1021])?([\u102D\u102E\u1032])?([\u1036\u1037\u1038]{0,2})([\u103B-\u103E]{0,3})([\u102F\u1030])?([\u1036\u1037\u1038]{0,2})([\u102D\u102E\u1032])?/) { "#{$2}#{$3}#{$6}#{$1}#{$4}#{$9}#{$7}#{$5}#{$8}" } | |
output.gsub!("#{ans}#{u}", "#{u}#{ans}") | |
output.gsub!(/(\u103A)(\u1037)/) { "#{$2}#{$1}" } | |
output | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment