Skip to content

Instantly share code, notes, and snippets.

@khacanh
Last active August 29, 2015 14:00
Show Gist options
  • Save khacanh/11136458 to your computer and use it in GitHub Desktop.
Save khacanh/11136458 to your computer and use it in GitHub Desktop.
module Z1Uni
def self.convert(input)
output = input.clone.force_encoding("UTF-8")
tallAA = "\u102B"
nAA = "\u102C"
vi = "\u102D"
ii = "\u102E"
u = "\u102F"
uu = "\u1030"
ve = "\u1031"
ai = "\u1032"
ans = "\u1036"
db = "\u1037"
visarga = "\u1038"
asat = "\u103A"
ya = "\u103B"
ra = "\u103C"
wa = "\u103D"
ha = "\u103E"
zero = "\u1040"
output.gsub!(/\u106A/, " \u1009")
output.gsub!(/\u1025(?=[\u1039\u102C])/, "\u1009") #new
output.gsub!(/\u1025\u102E/, "\u1026") #new
output.gsub!(/\u106B/, "\u100A")
output.gsub!(/\u1090/, "\u101B")
output.gsub!(/\u1040/, zero)
output.gsub!(/\u108F/, "\u1014")
output.gsub!(/\u1012/, "\u1012")
output.gsub!(/\u1013/, "\u1013")
output.gsub!(/[\u103D\u1087]/, ha)
output.gsub!(/\u103C/, wa)
output.gsub!(/[\u103B\u107E\u107F\u1080\u1081\u1082\u1083\u1084]/, ra)
output.gsub!(/[\u103A\u107D]/, ya)
output.gsub!(/\u103E\u103B/, "#{ya}#{ha}")
output.gsub!(/\u108A/, "#{wa}#{ha}")
output.gsub!(/\u103E\u103D/, "#{wa}#{ha}")
output.gsub!(/(\u1031)?(\u103C)?([\u1000-\u1021])\u1064/) { "\u1064#{$1}#{$2}#{$3}" }
output.gsub!(/(\u1031)?(\u103C)?([\u1000-\u1021])\u108B/) { "\u1064#{$1}#{$2}#{$3}\u102D" }
output.gsub!(/(\u1031)?(\u103C)?([\u1000-\u1021])\u108C/) { "\u1064#{$1}#{$2}#{$3}\u102E" }
output.gsub!(/(\u1031)?(\u103C)?([\u1000-\u1021])\u108D/) { "\u1064#{$1}#{$2}#{$3}\u1036" }
output.gsub!(/\u105A/, "#{tallAA}#{asat}")
output.gsub!(/\u108E/, "#{vi}#{ans}")
output.gsub!(/\u1033/, u)
output.gsub!(/\u1034/, uu)
output.gsub!(/\u1088/, "#{ha}#{u}")
output.gsub!(/\u1089/, "#{ha}#{uu}")
output.gsub!(/\u1039/, "\u103A")
output.gsub!(/[\u1094\u1095]/, db)
output.gsub!(/([\u1000-\u1021])([\u102C\u102D\u102E\u1032\u1036]){1,2}([\u1060\u1061\u1062\u1063\u1065\u1066\u1067\u1068\u1069\u1070\u1071\u1072\u1073\u1074\u1075\u1076\u1077\u1078\u1079\u107A\u107B\u107C\u1085])/) { "#{$1}#{$3}#{$2}" } #new
output.gsub!(/\u1064/, "\u1004\u103A\u1039")
output.gsub!(/\u104E/, "\u104E\u1004\u103A\u1038")
output.gsub!(/\u1086/, "\u103F")
output.gsub!(/\u1060/, "\u1039\u1000")
output.gsub!(/\u1061/, "\u1039\u1001")
output.gsub!(/\u1062/, "\u1039\u1002")
output.gsub!(/\u1063/, "\u1039\u1003")
output.gsub!(/\u1065/, "\u1039\u1005")
output.gsub!(/[\u1066\u1067]/, "\u1039\u1006")
output.gsub!(/\u1068/, "\u1039\u1007")
output.gsub!(/\u1069/, "\u1039\u1008")
output.gsub!(/\u106C/, "\u1039\u100B")
output.gsub!(/\u1070/, "\u1039\u100F")
output.gsub!(/[\u1071\u1072]/, "\u1039\u1010")
output.gsub!(/[\u1073\u1074]/, "\u1039\u1011")
output.gsub!(/\u1075/, "\u1039\u1012")
output.gsub!(/\u1076/, "\u1039\u1013")
output.gsub!(/\u1077/, "\u1039\u1014")
output.gsub!(/\u1078/, "\u1039\u1015")
output.gsub!(/\u1079/, "\u1039\u1016")
output.gsub!(/\u107A/, "\u1039\u1017")
output.gsub!(/\u107B/, "\u1039\u1018")
output.gsub!(/\u107C/, "\u1039\u1019")
output.gsub!(/\u1085/, "\u1039\u101C")
output.gsub!(/\u106D/, "\u1039\u100C")
output.gsub!(/\u1091/, "\u100F\u1039\u100D")
output.gsub!(/\u1092/, "\u100B\u1039\u100C")
output.gsub!(/\u1097/, "\u100B\u1039\u100B")
output.gsub!(/\u106F/, "\u100E\u1039\u100D")
output.gsub!(/\u106E/, "\u100D\u1039\u100D")
output.gsub!(/(\u103C)([\u1000-\u1021])(\u1039[\u1000-\u1021])?/) { "#{$2}#{$3}#{$1}" }
output.gsub!(/(\u103E)(\u103D)([\u103B\u103C])/) { "#{$3}#{$2}#{$1}" }
output.gsub!(/(\u103E)([\u103B\u103C])/) { "#{$2}#{$1}" }
output.gsub!(/(\u103D)([\u103B\u103C])/, "#{$2}#{$1}")
output.gsub!(/(([\u1000-\u101C\u101E-\u102A\u102C\u102E-\u103F\u104C-\u109F]))(\u1040)(?=\u0020)?/) { $1 ? "#{$1}\u101D" : "#{$&}#{$1}" }
output.gsub!(/((\u101D))(\u1040)(?=\u0020)?/) { $1 ? "#{$1}\u101D" : "#{$&}#{$1}" }
output.gsub!(/(([\u1000-\u101C\u101E-\u102A\u102C\u102E-\u103F\u104C-\u109F\u0020]))(\u1047)/) { $1 ? "#{$1}\u101B" : "#{$&}#{$1}" }
output.gsub!(/(\u1047)( ? = [\u1000 - \u101C\u101E - \u102A\u102C\u102E - \u103F\u104C - \u109F\u0020])/, "\u101B")
output.gsub!(/(\u1031)?([\u1000-\u1021])(\u1039[\u1000-\u1021])?([\u102D\u102E\u1032])?([\u1036\u1037\u1038]{0,2})([\u103B-\u103E]{0,3})([\u102F\u1030])?([\u1036\u1037\u1038]{0,2})([\u102D\u102E\u1032])?/) { "#{$2}#{$3}#{$6}#{$1}#{$4}#{$9}#{$7}#{$5}#{$8}" }
output.gsub!("#{ans}#{u}", "#{u}#{ans}")
output.gsub!(/(\u103A)(\u1037)/) { "#{$2}#{$1}" }
output
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment