lengarvey · August 29, 2015 14:03
diff --git a/README.md b/README.md
diff --git a/fixed_rfc3986_parser.rb b/fixed_rfc3986_parser.rb
 require 'uri'

 module URI
  class RFC3986_Parser # :nodoc:
    # Non validating splitting regular expression for RFC3986
    RFC3986_URI_SPLIT = Regexp.new '\A(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?\z'
    QUERY_RESERVED = /[\[\] \/!'()\*]/

    def non_validating_split(uri) #:nodoc:
      uri =~ RFC3986_URI_SPLIT

      scheme    = $2
      authority = $4
      path      = $5
      query     = $7
      fragment  = $9

      [scheme, authority, path, query, fragment]
    end

    def percent_encode(str) # :nodoc:
      tmp = ''
      str.each_byte do |uc|
        tmp << sprintf('%%%02X', uc)
      end
      tmp
    end

    def parse(uri, retry_parse = true) # :nodoc:
      begin
        scheme, userinfo, host, port,
          registry, path, opaque, query, fragment = self.split(uri)

        if scheme && URI.scheme_list.include?(scheme.upcase)
          URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
                                             registry, path, opaque, query,
                                             fragment, self)
        else
          Generic.new(scheme, userinfo, host, port,
                      registry, path, opaque, query,
                      fragment, self)
        end
      rescue URI::InvalidURIError => e
        if retry_parse
          parse(naive_escape(uri), false)
        else
          raise
        end
      end
    end

    private

    # will only attempt to escape the query string
    def naive_escape(uri) # :nodoc: #
      scheme, authority, path, query, fragment = non_validating_split(uri)
      query.gsub!(QUERY_RESERVED) { percent_encode($&) }
      "#{scheme}://#{authority}#{path}?#{query}##{fragment}"
    end

  end # class Parser
 end # module URI
diff --git a/irb_output.rb b/irb_output.rb
 # This is irb showing the problem
 irb(main):001:0> Object::RUBY_VERSION
 => "2.2.0"
 irb(main):002:0> URI.parse "http://user:[email protected]/go/to/widgets?a[b]=1&test=hello world&x=/#hello"
 URI::InvalidURIError: bad URI(is not URI?): http://user:[email protected]/go/to/widgets?a[b]=1&test=hello world&x=/#hello
        from /Users/artega/.rubies/ruby-trunk/lib/ruby/2.2.0/uri/rfc3986_parser.rb:47:in `split'
        from /Users/artega/.rubies/ruby-trunk/lib/ruby/2.2.0/uri/rfc3986_parser.rb:53:in `parse'
        from /Users/artega/.rubies/ruby-trunk/lib/ruby/2.2.0/uri/common.rb:223:in `parse'
        from (irb):2
        from /Users/artega/.rubies/ruby-trunk/bin/irb:11:in `<main>'
 irb(main):003:0> require 'fixed_rfc3986_parser' # my monkey patch to URI::RFC3986_Parser
 => true
 irb(main):004:0> URI.parse "http://user:[email protected]/go/to/widgets?a[b]=1&test=hello world&x=/#hello"
 => #<URI::HTTP:0x007fb9a93bb860 URL:http://user:[email protected]/go/to/widgets?a%5Bb%5D=1&test=hello%20world&x=%2F#hello>
 irb(main):005:0>
	require 'uri'

	module URI
	class RFC3986_Parser # :nodoc:
	# Non validating splitting regular expression for RFC3986
	RFC3986_URI_SPLIT = Regexp.new '\A(([^:/?#]+):)?(//([^/?#]))?([^?#])(\?([^#]))?(#(.))?\z'
	QUERY_RESERVED = /[\[\] \/!'()\*]/

	def non_validating_split(uri) #:nodoc:
	uri =~ RFC3986_URI_SPLIT

	scheme = $2
	authority = $4
	path = $5
	query = $7
	fragment = $9

	[scheme, authority, path, query, fragment]
	end

	def percent_encode(str) # :nodoc:
	tmp = ''
	str.each_byte do \|uc\|
	tmp << sprintf('%%%02X', uc)
	end
	tmp
	end

	def parse(uri, retry_parse = true) # :nodoc:
	begin
	scheme, userinfo, host, port,
	registry, path, opaque, query, fragment = self.split(uri)

	if scheme && URI.scheme_list.include?(scheme.upcase)
	URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
	registry, path, opaque, query,
	fragment, self)
	else
	Generic.new(scheme, userinfo, host, port,
	registry, path, opaque, query,
	fragment, self)
	end
	rescue URI::InvalidURIError => e
	if retry_parse
	parse(naive_escape(uri), false)
	else
	raise
	end
	end
	end

	private

	# will only attempt to escape the query string
	def naive_escape(uri) # :nodoc: #
	scheme, authority, path, query, fragment = non_validating_split(uri)
	query.gsub!(QUERY_RESERVED) { percent_encode($&) }
	"#{scheme}://#{authority}#{path}?#{query}##{fragment}"
	end

	end # class Parser
	end # module URI
	# This is irb showing the problem
	irb(main):001:0> Object::RUBY_VERSION
	=> "2.2.0"
	irb(main):002:0> URI.parse "http://user:[email protected]/go/to/widgets?a[b]=1&test=hello world&x=/#hello"
	URI::InvalidURIError: bad URI(is not URI?): http://user:[email protected]/go/to/widgets?a[b]=1&test=hello world&x=/#hello
	from /Users/artega/.rubies/ruby-trunk/lib/ruby/2.2.0/uri/rfc3986_parser.rb:47:in `split'
	from /Users/artega/.rubies/ruby-trunk/lib/ruby/2.2.0/uri/rfc3986_parser.rb:53:in `parse'
	from /Users/artega/.rubies/ruby-trunk/lib/ruby/2.2.0/uri/common.rb:223:in `parse'
	from (irb):2
	from /Users/artega/.rubies/ruby-trunk/bin/irb:11:in `<main>'
	irb(main):003:0> require 'fixed_rfc3986_parser' # my monkey patch to URI::RFC3986_Parser
	=> true
	irb(main):004:0> URI.parse "http://user:[email protected]/go/to/widgets?a[b]=1&test=hello world&x=/#hello"
	=> #<URI::HTTP:0x007fb9a93bb860 URL:http://user:[email protected]/go/to/widgets?a%5Bb%5D=1&test=hello%20world&x=%2F#hello>
	irb(main):005:0>