-
-
Save foucist/4736126 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Regular Expression for URL validation | |
# | |
# Converted Diego Perini's regexp to ruby | |
# | |
# Author: James Robey | |
# Updated: 2013/02/07 | |
# | |
# Credits to Matt (mparodi) | |
# | |
# | |
# the regular expression composed & commented | |
# could be easily tweaked for RFC compliance, | |
# it was expressly modified to fit & satisfy | |
# these test for an URL shortener: | |
# | |
# http://mathiasbynens.be/demo/url-regex | |
# | |
# Notes on possible differences from a standard/generic validation: | |
# | |
# - utf-8 char class take in consideration the full Unicode range | |
# - TLDs have been made mandatory so single names like "localhost" fails | |
# - protocols have been restricted to ftp, http and https only as requested | |
# | |
# Changes: | |
# | |
# - IP address dotted notation validation, range: 1.0.0.0 - 223.255.255.255 | |
# first and last IP address of each class is considered invalid | |
# (since they are broadcast/network addresses) | |
# | |
# - Added exclusion of private, reserved and/or local networks ranges | |
# | |
# Compressed one-line versions: | |
# | |
# Javascript version | |
# | |
# /^(?:(?:https?|ftp):\/\/)(?:\S+(?::\S*)?@)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:\/[^\s]*)?$/i | |
# | |
# PHP version | |
# | |
# _^(?:(?:https?|ftp)://)(?:\S+(?::\S*)?@)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/[^\s]*)?$_iuS | |
# | |
module URL | |
Regexp = begin | |
# protocol identifier | |
protocol_identifier = "(?:(?:https?|ftp)://)" | |
# user:pass authentication | |
user_pass_auth = "(?:\\S+(?::\\S*)?@)?" | |
# IP address exclusion | |
# private & local networks | |
ip_address_exclusion = "(?!10(?:\\.\\d{1,3}){3})" + "(?!127(?:\\.\\d{1,3}){3})" + "(?!169\\.254(?:\\.\\d{1,3}){2})" + "(?!192\\.168(?:\\.\\d{1,3}){2})" + "(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})" | |
# IP address dotted notation octets | |
# excludes loopback network 0.0.0.0 | |
# excludes reserved space >= 224.0.0.0 | |
# excludes network & broacast addresses | |
# (first & last IP address of each class) | |
ip_address_exclusion2 = "(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])" + "(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}" + "(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))" | |
host_name = "(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)" | |
domain_name = "(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*" | |
tld = "(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))" | |
port_number = "(?::\\d{2,5})?" | |
resource_path = "(?:/[^\\s]*)?" | |
pattern = /\A#{protocol_identifier + user_pass_auth + "(?:" + ip_address_exclusion + ip_address_exclusion2 + "|" + host_name + domain_name + tld + ")" + port_number + resource_path}\z/i | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment