Created
December 27, 2012 12:12
-
-
Save baltuonis/4387854 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class String | |
# Map of latin chars and their representations as unicode chars. | |
LATIN_MAP = { | |
"A" => %w[À Á Â Ã Å Ą Ā], | |
"a" => %w[à á â ã å ą ả ã ạ ă ắ ằ ẳ ẵ ặ â ấ ầ ẩ ẫ ậ ā], | |
"AE" => %w[Ä Æ Ǽ], | |
"ae" => %w[ä æ ǽ], | |
"C" => %w[Ç Č Ć Ĉ], | |
"c" => %w[ç č ć ĉ], | |
"D" => %w[Ð], | |
"d" => %w[đ], | |
"E" => %w[È É Ê Ẽ Ę Ė Ē Ë], | |
"e" => %w[è é ę ë ė ẻ ẽ ẹ ê ế ề ể ễ ệ ē], | |
"G" => %w[Ģ], | |
"g" => %w[ģ], | |
"I" => %w[Ì Í Î Ï Ĩ Į Ī], | |
"i" => %w[ì í î ï ĩ į ỉ ị ī], | |
"K" => %w[Ķ], | |
"k" => %w[ķ], | |
"L" => %w[Ļ], | |
"l" => %w[ļ], | |
"N" => %w[Ñ Ń Ņ], | |
"n" => %w[ñ ń ņ], | |
"O" => %w[Ò Ó Ô Õ Ø], | |
"o" => %w[ò ó õ ỏ õ ọ ô ố ồ ổ ỗ ộ ơ ớ ờ ở ỡ ợ ø], | |
"OE" => %w[Ö Œ], | |
"oe" => %w[ö œ], | |
"R" => %w[Ŗ], | |
"r" => %w[ŗ], | |
"S" => %w[Š], | |
"s" => %w[š], | |
"ss" => %w[ß], | |
"U" => %w[Ù Ú Ũ Ű Ů Ũ Ų Ū Û], | |
"u" => %w[ų ū û ú ù ű ů ủ ũ ụ ư ứ ừ ử ữ ự], | |
"UE" => %w[Ü], | |
"ue" => %w[ü], | |
"x" => %w[×], | |
"Y" => %w[Ý Ÿ Ŷ], | |
"y" => %w[ý ÿ ŷ ỳ ỷ ỹ ỵ], | |
"Z" => %w[Ž], | |
"z" => %w[ž] | |
}.freeze | |
# Replaces non-latin chars, leaves some special ones. | |
def latinize | |
c = dup | |
for char, map in LATIN_MAP | |
c.gsub!(/[#{map.join}]/mu, char) | |
end | |
c.gsub!(/[^a-zA-Z0-9\.\,\|\?\!\:;"'=\+\-_]+/mu, " ") | |
c.gsub!(/\s+/, " ") | |
c | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment