patrickkettner · November 1, 2016 09:07
diff --git a/OLD_RUBY_CGI_ESCAPE.gemspec b/OLD_RUBY_CGI_ESCAPE.gemspec
 Gem::Specification.new do |gem|
  gem.name    = 'OLD_RUBY_CGI_ESCAPE'
  gem.version = '1.0.0'
  gem.date    = Date.today.to_s

  gem.summary = "pure ruby HTML escaping extracted from the CGI module from Ruby <= 2.4"
  gem.description = "extended description"

  gem.authors  = ['Nobuyoshi Nakada', 'Eric Hodel', 'Yui NARUSE', 'Kenta Murata', 'Zachary Scott', 'Patrick Kettner']
  gem.email    = '[email protected]'
  gem.homepage = 'https://gist.github.com/patrickkettner/48cb6c904f776d169d8d7dcaa59832ba'

  # ensure the gem is built out of versioned files
  gem.files    = `git ls-files`.split("\n")
  gem.require_paths = ["."]
 end
diff --git a/OLD_RUBY_CGI_ESCAPE.rb b/OLD_RUBY_CGI_ESCAPE.rb
 # Taken from Ruby's CGI module, version 2.4 previos - git hash 28dc410
 # Full license details for original source code available at Ruby's website
 # https://www.ruby-lang.org/en/about/license.txt
 # ------------------------------------------------------------------------------
 # frozen_string_literal: false
 class OLD_RUBY_CGI_ESCAPE; module Util; end; extend Util; end
 module OLD_RUBY_CGI_ESCAPE::Util
  @@accept_charset="UTF-8" unless defined?(@@accept_charset)
  # URL-encode a string.
  #   url_encoded_string = OLD_RUBY_CGI_ESCAPE::escape("'Stop!' said Fred")
  #      # => "%27Stop%21%27+said+Fred"
  def escape(string)
    encoding = string.encoding
    string.b.gsub(/([^ a-zA-Z0-9_.-]+)/) do |m|
      '%' + m.unpack('H2' * m.bytesize).join('%').upcase
    end.tr(' ', '+').force_encoding(encoding)
  end

  # URL-decode a string with encoding(optional).
  #   string = OLD_RUBY_CGI_ESCAPE::unescape("%27Stop%21%27+said+Fred")
  #      # => "'Stop!' said Fred"
  def unescape(string,encoding=@@accept_charset)
    str=string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m|
      [m.delete('%')].pack('H*')
    end.force_encoding(encoding)
    str.valid_encoding? ? str : str.force_encoding(string.encoding)
  end

  # The set of special characters and their escaped values
  TABLE_FOR_ESCAPE_HTML__ = {
    "'" => '&#39;',
    '&' => '&amp;',
    '"' => '&quot;',
    '<' => '&lt;',
    '>' => '&gt;',
  }

  # Escape special characters in HTML, namely &\"<>
  #   OLD_RUBY_CGI_ESCAPE::escapeHTML('Usage: foo "bar" <baz>')
  #      # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"
  def escapeHTML(string)
    string.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
  end

  # Unescape a string that has been HTML-escaped
  #   OLD_RUBY_CGI_ESCAPE::unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
  #      # => "Usage: foo \"bar\" <baz>"
  def unescapeHTML(string)
    return string unless string.include? '&'
    enc = string.encoding
    if enc != Encoding::UTF_8 && [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc)
      return string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
        case $1.encode(Encoding::US_ASCII)
        when 'apos'                then "'".encode(enc)
        when 'amp'                 then '&'.encode(enc)
        when 'quot'                then '"'.encode(enc)
        when 'gt'                  then '>'.encode(enc)
        when 'lt'                  then '<'.encode(enc)
        when /\A#0*(\d+)\z/        then $1.to_i.chr(enc)
        when /\A#x([0-9a-f]+)\z/i  then $1.hex.chr(enc)
        end
      end
    end
    asciicompat = Encoding.compatible?(string, "a")
    string.gsub(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
      match = $1.dup
      case match
      when 'apos'                then "'"
      when 'amp'                 then '&'
      when 'quot'                then '"'
      when 'gt'                  then '>'
      when 'lt'                  then '<'
      when /\A#0*(\d+)\z/
        n = $1.to_i
        if enc == Encoding::UTF_8 or
          enc == Encoding::ISO_8859_1 && n < 256 or
          asciicompat && n < 128
          n.chr(enc)
        else
          "&##{$1};"
        end
      when /\A#x([0-9a-f]+)\z/i
        n = $1.hex
        if enc == Encoding::UTF_8 or
          enc == Encoding::ISO_8859_1 && n < 256 or
          asciicompat && n < 128
          n.chr(enc)
        else
          "&#x#{$1};"
        end
      else
        "&#{match};"
      end
    end
  end

  # Synonym for OLD_RUBY_CGI_ESCAPE::escapeHTML(str)
  alias escape_html escapeHTML

  # Synonym for OLD_RUBY_CGI_ESCAPE::unescapeHTML(str)
  alias unescape_html unescapeHTML

  # Escape only the tags of certain HTML elements in +string+.
  #
  # Takes an element or elements or array of elements.  Each element
  # is specified by the name of the element, without angle brackets.
  # This matches both the start and the end tag of that element.
  # The attribute list of the open tag will also be escaped (for
  # instance, the double-quotes surrounding attribute values).
  #
  #   print OLD_RUBY_CGI_ESCAPE::escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
  #     # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
  #
  #   print OLD_RUBY_CGI_ESCAPE::escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
  #     # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
  def escapeElement(string, *elements)
    elements = elements[0] if elements[0].kind_of?(Array)
    unless elements.empty?
      string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do
        OLD_RUBY_CGI_ESCAPE::escapeHTML($&)
      end
    else
      string
    end
  end

  # Undo escaping such as that done by OLD_RUBY_CGI_ESCAPE::escapeElement()
  #
  #   print OLD_RUBY_CGI_ESCAPE::unescapeElement(
  #           OLD_RUBY_CGI_ESCAPE::escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
  #     # "&lt;BR&gt;<A HREF="url"></A>"
  #
  #   print OLD_RUBY_CGI_ESCAPE::unescapeElement(
  #           OLD_RUBY_CGI_ESCAPE::escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
  #     # "&lt;BR&gt;<A HREF="url"></A>"
  def unescapeElement(string, *elements)
    elements = elements[0] if elements[0].kind_of?(Array)
    unless elements.empty?
      string.gsub(/&lt;\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?&gt;/i) do
        unescapeHTML($&)
      end
    else
      string
    end
  end

  # Synonym for OLD_RUBY_CGI_ESCAPE::escapeElement(str)
  alias escape_element escapeElement

  # Synonym for OLD_RUBY_CGI_ESCAPE::unescapeElement(str)
  alias unescape_element unescapeElement

  # Abbreviated day-of-week names specified by RFC 822
  RFC822_DAYS = %w[ Sun Mon Tue Wed Thu Fri Sat ]

  # Abbreviated month names specified by RFC 822
  RFC822_MONTHS = %w[ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ]

  # Format a +Time+ object as a String using the format specified by RFC 1123.
  #
  #   OLD_RUBY_CGI_ESCAPE::rfc1123_date(Time.now)
  #     # Sat, 01 Jan 2000 00:00:00 GMT
  def rfc1123_date(time)
    t = time.clone.gmtime
    return format("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT",
                  RFC822_DAYS[t.wday], t.day, RFC822_MONTHS[t.month-1], t.year,
                  t.hour, t.min, t.sec)
  end

  # Prettify (indent) an HTML string.
  #
  # +string+ is the HTML string to indent.  +shift+ is the indentation
  # unit to use; it defaults to two spaces.
  #
  #   print OLD_RUBY_CGI_ESCAPE::pretty("<HTML><BODY></BODY></HTML>")
  #     # <HTML>
  #     #   <BODY>
  #     #   </BODY>
  #     # </HTML>
  #
  #   print OLD_RUBY_CGI_ESCAPE::pretty("<HTML><BODY></BODY></HTML>", "\t")
  #     # <HTML>
  #     #         <BODY>
  #     #         </BODY>
  #     # </HTML>
  #
  def pretty(string, shift = "  ")
    lines = string.gsub(/(?!\A)<.*?>/m, "\n\\0").gsub(/<.*?>(?!\n)/m, "\\0\n")
    end_pos = 0
    while end_pos = lines.index(/^<\/(\w+)/, end_pos)
      element = $1.dup
      start_pos = lines.rindex(/^\s*<#{element}/i, end_pos)
      lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__"
    end
    lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1')
  end

  alias h escapeHTML
 end
	Gem::Specification.new do \|gem\|
	gem.name = 'OLD_RUBY_CGI_ESCAPE'
	gem.version = '1.0.0'
	gem.date = Date.today.to_s

	gem.summary = "pure ruby HTML escaping extracted from the CGI module from Ruby <= 2.4"
	gem.description = "extended description"

	gem.authors = ['Nobuyoshi Nakada', 'Eric Hodel', 'Yui NARUSE', 'Kenta Murata', 'Zachary Scott', 'Patrick Kettner']
	gem.email = '[email protected]'
	gem.homepage = 'https://gist.github.com/patrickkettner/48cb6c904f776d169d8d7dcaa59832ba'

	# ensure the gem is built out of versioned files
	gem.files = `git ls-files`.split("\n")
	gem.require_paths = ["."]
	end
	# Taken from Ruby's CGI module, version 2.4 previos - git hash 28dc410
	# Full license details for original source code available at Ruby's website
	# https://www.ruby-lang.org/en/about/license.txt
	# ------------------------------------------------------------------------------
	# frozen_string_literal: false
	class OLD_RUBY_CGI_ESCAPE; module Util; end; extend Util; end
	module OLD_RUBY_CGI_ESCAPE::Util
	@@accept_charset="UTF-8" unless defined?(@@accept_charset)
	# URL-encode a string.
	# url_encoded_string = OLD_RUBY_CGI_ESCAPE::escape("'Stop!' said Fred")
	# # => "%27Stop%21%27+said+Fred"
	def escape(string)
	encoding = string.encoding
	string.b.gsub(/([^ a-zA-Z0-9_.-]+)/) do \|m\|
	'%' + m.unpack('H2' * m.bytesize).join('%').upcase
	end.tr(' ', '+').force_encoding(encoding)
	end

	# URL-decode a string with encoding(optional).
	# string = OLD_RUBY_CGI_ESCAPE::unescape("%27Stop%21%27+said+Fred")
	# # => "'Stop!' said Fred"
	def unescape(string,encoding=@@accept_charset)
	str=string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do \|m\|
	[m.delete('%')].pack('H*')
	end.force_encoding(encoding)
	str.valid_encoding? ? str : str.force_encoding(string.encoding)
	end

	# The set of special characters and their escaped values
	TABLE_FOR_ESCAPE_HTML__ = {
	"'" => ''',
	'&' => '&',
	'"' => '"',
	'<' => '<',
	'>' => '>',
	}

	# Escape special characters in HTML, namely &\"<>
	# OLD_RUBY_CGI_ESCAPE::escapeHTML('Usage: foo "bar" <baz>')
	# # => "Usage: foo "bar" <baz>"
	def escapeHTML(string)
	string.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
	end

	# Unescape a string that has been HTML-escaped
	# OLD_RUBY_CGI_ESCAPE::unescapeHTML("Usage: foo "bar" <baz>")
	# # => "Usage: foo \"bar\" <baz>"
	def unescapeHTML(string)
	return string unless string.include? '&'
	enc = string.encoding
	if enc != Encoding::UTF_8 && [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc)
	return string.gsub(Regexp.new('&(apos\|amp\|quot\|gt\|lt\|#[0-9]+\|#x[0-9A-Fa-f]+);'.encode(enc))) do
	case $1.encode(Encoding::US_ASCII)
	when 'apos' then "'".encode(enc)
	when 'amp' then '&'.encode(enc)
	when 'quot' then '"'.encode(enc)
	when 'gt' then '>'.encode(enc)
	when 'lt' then '<'.encode(enc)
	when /\A#0*(\d+)\z/ then $1.to_i.chr(enc)
	when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
	end
	end
	end
	asciicompat = Encoding.compatible?(string, "a")
	string.gsub(/&(apos\|amp\|quot\|gt\|lt\|\#[0-9]+\|\#[xX][0-9A-Fa-f]+);/) do
	match = $1.dup
	case match
	when 'apos' then "'"
	when 'amp' then '&'
	when 'quot' then '"'
	when 'gt' then '>'
	when 'lt' then '<'
	when /\A#0*(\d+)\z/
	n = $1.to_i
	if enc == Encoding::UTF_8 or
	enc == Encoding::ISO_8859_1 && n < 256 or
	asciicompat && n < 128
	n.chr(enc)
	else
	"&##{$1};"
	end
	when /\A#x([0-9a-f]+)\z/i
	n = $1.hex
	if enc == Encoding::UTF_8 or
	enc == Encoding::ISO_8859_1 && n < 256 or
	asciicompat && n < 128
	n.chr(enc)
	else
	"&#x#{$1};"
	end
	else
	"&#{match};"
	end
	end
	end

	# Synonym for OLD_RUBY_CGI_ESCAPE::escapeHTML(str)
	alias escape_html escapeHTML

	# Synonym for OLD_RUBY_CGI_ESCAPE::unescapeHTML(str)
	alias unescape_html unescapeHTML

	# Escape only the tags of certain HTML elements in +string+.
	#
	# Takes an element or elements or array of elements. Each element
	# is specified by the name of the element, without angle brackets.
	# This matches both the start and the end tag of that element.
	# The attribute list of the open tag will also be escaped (for
	# instance, the double-quotes surrounding attribute values).
	#
	# print OLD_RUBY_CGI_ESCAPE::escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
	# # "<BR><A HREF="url"></A&gt"
	#
	# print OLD_RUBY_CGI_ESCAPE::escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
	# # "<BR><A HREF="url"></A&gt"
	def escapeElement(string, *elements)
	elements = elements[0] if elements[0].kind_of?(Array)
	unless elements.empty?
	string.gsub(/<\/?(?:#{elements.join("\|")})(?!\w)(?:.\|\n)*?>/i) do
	OLD_RUBY_CGI_ESCAPE::escapeHTML($&)
	end
	else
	string
	end
	end

	# Undo escaping such as that done by OLD_RUBY_CGI_ESCAPE::escapeElement()
	#
	# print OLD_RUBY_CGI_ESCAPE::unescapeElement(
	# OLD_RUBY_CGI_ESCAPE::escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
	# # "<BR><A HREF="url"></A>"
	#
	# print OLD_RUBY_CGI_ESCAPE::unescapeElement(
	# OLD_RUBY_CGI_ESCAPE::escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
	# # "<BR><A HREF="url"></A>"
	def unescapeElement(string, *elements)
	elements = elements[0] if elements[0].kind_of?(Array)
	unless elements.empty?
	string.gsub(/<\/?(?:#{elements.join("\|")})(?!\w)(?:.\|\n)*?>/i) do
	unescapeHTML($&)
	end
	else
	string
	end
	end

	# Synonym for OLD_RUBY_CGI_ESCAPE::escapeElement(str)
	alias escape_element escapeElement

	# Synonym for OLD_RUBY_CGI_ESCAPE::unescapeElement(str)
	alias unescape_element unescapeElement

	# Abbreviated day-of-week names specified by RFC 822
	RFC822_DAYS = %w[ Sun Mon Tue Wed Thu Fri Sat ]

	# Abbreviated month names specified by RFC 822
	RFC822_MONTHS = %w[ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ]

	# Format a +Time+ object as a String using the format specified by RFC 1123.
	#
	# OLD_RUBY_CGI_ESCAPE::rfc1123_date(Time.now)
	# # Sat, 01 Jan 2000 00:00:00 GMT
	def rfc1123_date(time)
	t = time.clone.gmtime
	return format("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT",
	RFC822_DAYS[t.wday], t.day, RFC822_MONTHS[t.month-1], t.year,
	t.hour, t.min, t.sec)
	end

	# Prettify (indent) an HTML string.
	#
	# +string+ is the HTML string to indent. +shift+ is the indentation
	# unit to use; it defaults to two spaces.
	#
	# print OLD_RUBY_CGI_ESCAPE::pretty("<HTML><BODY></BODY></HTML>")
	# # <HTML>
	# # <BODY>
	# # </BODY>
	# # </HTML>
	#
	# print OLD_RUBY_CGI_ESCAPE::pretty("<HTML><BODY></BODY></HTML>", "\t")
	# # <HTML>
	# # <BODY>
	# # </BODY>
	# # </HTML>
	#
	def pretty(string, shift = " ")
	lines = string.gsub(/(?!\A)<.?>/m, "\n\\0").gsub(/<.?>(?!\n)/m, "\\0\n")
	end_pos = 0
	while end_pos = lines.index(/^<\/(\w+)/, end_pos)
	element = $1.dup
	start_pos = lines.rindex(/^\s*<#{element}/i, end_pos)
	lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__"
	end
	lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1')
	end

	alias h escapeHTML
	end