choonkeat · July 28, 2010 04:33
diff --git a/gruber.rb b/gruber.rb
 module Gruber
  # http://daringfireball.net/2010/07/improved_regex_for_matching_urls
  URL_REGEXP = %r{(?xi)
  \b
  (                           # Capture 1: entire matched URL
    (?:
      [a-z][\w-]+:                # URL protocol and colon
      (?:
        /{1,3}                        # 1-3 slashes
        |                             #   or
        [a-z0-9%]                     # Single letter or digit or '%'
                                      # (Trying not to match e.g. "URI::Escape")
      )
      |                           #   or
      www\d{0,3}[.]               # "www.", "www1.", "www2." … "www999."
      |                           #   or
      [a-z0-9.\-]+[.][a-z]{2,4}/  # looks like domain name followed by a slash
    )
    (?:                           # One or more:
      [^\s()<>]+                      # Run of non-space, non-()<>
      |                               #   or
      \(([^\s()<>]+|(\([^\s()<>]+\)))*\)  # balanced parens, up to 2 levels
    )+
    (?:                           # End with:
      \(([^\s()<>]+|(\([^\s()<>]+\)))*\)  # balanced parens, up to 2 levels
      |                                   #   or
      [^\s`!()\[\]{};:'".,<>?«»“”‘’]        # not a space or one of these punct chars
    )
  )}
 end

 # ruby gruber.rb
 if $0 == __FILE__
  require 'open-uri'
  testdata = open('http://daringfireball.net/misc/2010/07/url-matching-regex-test-data.text') {|f| f.read }
  testdata.scan(Gruber::URL_REGEXP).each_with_index do |m,i|
    puts "#{i+1}. #{m.first}"
  end
 end
	module Gruber
	# http://daringfireball.net/2010/07/improved_regex_for_matching_urls
	URL_REGEXP = %r{(?xi)
	\b
	( # Capture 1: entire matched URL
	(?:
	[a-z][\w-]+: # URL protocol and colon
	(?:
	/{1,3} # 1-3 slashes
	\| # or
	[a-z0-9%] # Single letter or digit or '%'
	# (Trying not to match e.g. "URI::Escape")
	)
	\| # or
	www\d{0,3}[.] # "www.", "www1.", "www2." … "www999."
	\| # or
	[a-z0-9.\-]+[.][a-z]{2,4}/ # looks like domain name followed by a slash
	)
	(?: # One or more:
	[^\s()<>]+ # Run of non-space, non-()<>
	\| # or
	\(([^\s()<>]+\|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
	)+
	(?: # End with:
	\(([^\s()<>]+\|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
	\| # or
	[^\s`!()\[\]{};:'".,<>?«»“”‘’] # not a space or one of these punct chars
	)
	)}
	end

	# ruby gruber.rb
	if $0 == __FILE__
	require 'open-uri'
	testdata = open('http://daringfireball.net/misc/2010/07/url-matching-regex-test-data.text') {\|f\| f.read }
	testdata.scan(Gruber::URL_REGEXP).each_with_index do \|m,i\|
	puts "#{i+1}. #{m.first}"
	end
	end
No results found