Created
March 3, 2016 04:53
-
-
Save AquisTech/6d5617ec381dc59f5f7e to your computer and use it in GitHub Desktop.
Unicode supported Regular expressions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /lib/unicode_regexp.rb | |
class UnicodeRegexp | |
# Returns string of Special Characters to be used in Regex | |
SPECIAL_CHARACTERS = [ | |
'\\~', '\\!', '\\@', '\\#', '\\$', '\\%', '\\^', '\\&', '\\*', '\\(', '\\)', '\\_', '\\+', '\\-', | |
'\\=', '\\|', '\\{', '\\}', '\\[', '\\]', '\\:', '\\;', '\\"', '\\<', '\\>', '\\.', '\\?', '\\/', | |
"\\\\\s" # Backslash & space character are purposely kept together in a string | |
].join | |
# Returns string of All the Digits of Unicode Character set | |
# Following list is taken from http://www.fileformat.info/info/unicode/category/Nd/list.htm | |
DIGITS = [ | |
(0x0030..0x0039).to_a, # DIGIT ZERO to NINE | |
(0x0660..0x0669).to_a, # ARABIC-INDIC DIGIT ZERO to NINE | |
(0x06F0..0x06F9).to_a, # EXTENDED ARABIC-INDIC DIGIT ZERO to NINE | |
(0x07C0..0x07C9).to_a, # NKO DIGIT ZERO to NINE | |
(0x0966..0x096F).to_a, # DEVANAGARI DIGIT ZERO to NINE | |
(0x09E6..0x09EF).to_a, # BENGALI DIGIT ZERO to NINE | |
(0x0A66..0x0A6F).to_a, # GURMUKHI DIGIT ZERO to NINE | |
(0x0AE6..0x0AEF).to_a, # GUJARATI DIGIT ZERO to NINE | |
(0x0B66..0x0B6F).to_a, # ORIYA DIGIT ZERO to NINE | |
(0x0BE6..0x0BEF).to_a, # TAMIL DIGIT ZERO to NINE | |
(0x0C66..0x0C6F).to_a, # TELUGU DIGIT ZERO to NINE | |
(0x0CE6..0x0CEF).to_a, # KANNADA DIGIT ZERO to NINE | |
(0x0D66..0x0D6F).to_a, # MALAYALAM DIGIT ZERO to NINE | |
(0x0E50..0x0E59).to_a, # THAI DIGIT ZERO to NINE | |
(0x0ED0..0x0ED9).to_a, # LAO DIGIT ZERO to NINE | |
(0x0F20..0x0F29).to_a, # TIBETAN DIGIT ZERO to NINE | |
(0x1090..0x1099).to_a, # MYANMAR SHAN DIGIT ZERO to NINE | |
(0x17E0..0x17E9).to_a, # KHMER DIGIT ZERO to NINE | |
(0x1810..0x1819).to_a, # MONGOLIAN DIGIT ZERO to NINE | |
(0x1946..0x194F).to_a, # LIMBU DIGIT ZERO to NINE | |
(0x19D0..0x19D9).to_a, # NEW TAI LUE DIGIT ZERO to NINE | |
(0x1A80..0x1A99).to_a, # TAI THAM HORA DIGIT ZERO to NINE | |
(0x1B50..0x1B59).to_a, # BALINESE DIGIT ZERO to NINE | |
(0x1BB0..0x1BB9).to_a, # SUNDANESE DIGIT ZERO to NINE | |
(0x1C40..0x1C49).to_a, # LEPCHA DIGIT ZERO to NINE | |
(0x1C50..0x1C59).to_a, # OL CHIKI DIGIT ZERO to NINE | |
(0xA620..0xA629).to_a, # VAI DIGIT ZERO to NINE | |
(0xA8D0..0xA8D9).to_a, # SAURASHTRA DIGIT ZERO to NINE | |
(0xA900..0xA909).to_a, # KAYAH LI DIGIT ZERO to NINE | |
(0xA9D0..0xA9D9).to_a, # JAVANESE DIGIT ZERO to NINE | |
(0xAA50..0xAA59).to_a, # CHAM DIGIT ZERO to NINE | |
(0xABF0..0xABF9).to_a, # MEETEI MAYEK DIGIT ZERO to NINE | |
(0xFF10..0xFF19).to_a, # FULLWIDTH DIGIT ZERO to NINE | |
(0x104A0..0x104A9).to_a, # OSMANYA DIGIT ZERO to NINE | |
(0x11066..0x1106F).to_a, # BRAHMI DIGIT ZERO to NINE | |
(0x110F0..0x110F9).to_a, # SORA SOMPENG DIGIT ZERO to NINE | |
(0x11136..0x1113F).to_a, # CHAKMA DIGIT ZERO to NINE | |
(0x111D0..0x111D9).to_a, # SHARADA DIGIT ZERO to NINE | |
(0x116C0..0x116C9).to_a, # TAKRI DIGIT ZERO to NINE | |
(0x1D7CE..0x1D7D7).to_a, # MATHEMATICAL BOLD DIGIT ZERO to NINE | |
(0x1D7D8..0x1D7E1).to_a, # MATHEMATICAL DOUBLE-STRUCK DIGIT ZERO to NINE | |
(0x1D7E2..0x1D7EB).to_a, # MATHEMATICAL SANS-SERIF DIGIT ZERO to NINE | |
(0x1D7EC..0x1D7F5).to_a, # MATHEMATICAL SANS-SERIF BOLD DIGIT ZERO to NINE | |
(0x1D7F6..0x1D7FF).to_a # MATHEMATICAL MONOSPACE DIGIT ZERO to NINE | |
].flatten.map(&:chr).join | |
class << self | |
regexps = { | |
# Excludes all special characters & digits | |
only_characters: Regexp.new(/^([^#{SPECIAL_CHARACTERS}#{DIGITS}]+\s{0,1})+$/), | |
# Includes only Digits | |
only_digits: Regexp.new(/^[#{DIGITS}]+$/), | |
# Excludes all special characters | |
characters_and_numbers: Regexp.new(/^([^#{SPECIAL_CHARACTERS}]+\s{0,1})+$/), | |
# Only special characters' string not allowed rest all combinations allowed | |
exclude_only_special_characters: Regexp.new(/^([^#{SPECIAL_CHARACTERS}]+(.)*)+$/), | |
} | |
regexps.each_pair do |attribute, regex| | |
define_method attribute do | |
regex | |
end | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /app/models/user.rb | |
class User < ActiveRecord::Base | |
validates :mobile, | |
format: { with: UnicodeRegexp.only_digits } | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment