Last active
November 1, 2016 09:07
-
-
Save patrickkettner/48cb6c904f776d169d8d7dcaa59832ba to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Gem::Specification.new do |gem| | |
gem.name = 'OLD_RUBY_CGI_ESCAPE' | |
gem.version = '1.0.0' | |
gem.date = Date.today.to_s | |
gem.summary = "pure ruby HTML escaping extracted from the CGI module from Ruby <= 2.4" | |
gem.description = "extended description" | |
gem.authors = ['Nobuyoshi Nakada', 'Eric Hodel', 'Yui NARUSE', 'Kenta Murata', 'Zachary Scott', 'Patrick Kettner'] | |
gem.email = '[email protected]' | |
gem.homepage = 'https://gist.github.com/patrickkettner/48cb6c904f776d169d8d7dcaa59832ba' | |
# ensure the gem is built out of versioned files | |
gem.files = `git ls-files`.split("\n") | |
gem.require_paths = ["."] | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Taken from Ruby's CGI module, version 2.4 previos - git hash 28dc410 | |
# Full license details for original source code available at Ruby's website | |
# https://www.ruby-lang.org/en/about/license.txt | |
# ------------------------------------------------------------------------------ | |
# frozen_string_literal: false | |
class OLD_RUBY_CGI_ESCAPE; module Util; end; extend Util; end | |
module OLD_RUBY_CGI_ESCAPE::Util | |
@@accept_charset="UTF-8" unless defined?(@@accept_charset) | |
# URL-encode a string. | |
# url_encoded_string = OLD_RUBY_CGI_ESCAPE::escape("'Stop!' said Fred") | |
# # => "%27Stop%21%27+said+Fred" | |
def escape(string) | |
encoding = string.encoding | |
string.b.gsub(/([^ a-zA-Z0-9_.-]+)/) do |m| | |
'%' + m.unpack('H2' * m.bytesize).join('%').upcase | |
end.tr(' ', '+').force_encoding(encoding) | |
end | |
# URL-decode a string with encoding(optional). | |
# string = OLD_RUBY_CGI_ESCAPE::unescape("%27Stop%21%27+said+Fred") | |
# # => "'Stop!' said Fred" | |
def unescape(string,encoding=@@accept_charset) | |
str=string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m| | |
[m.delete('%')].pack('H*') | |
end.force_encoding(encoding) | |
str.valid_encoding? ? str : str.force_encoding(string.encoding) | |
end | |
# The set of special characters and their escaped values | |
TABLE_FOR_ESCAPE_HTML__ = { | |
"'" => ''', | |
'&' => '&', | |
'"' => '"', | |
'<' => '<', | |
'>' => '>', | |
} | |
# Escape special characters in HTML, namely &\"<> | |
# OLD_RUBY_CGI_ESCAPE::escapeHTML('Usage: foo "bar" <baz>') | |
# # => "Usage: foo "bar" <baz>" | |
def escapeHTML(string) | |
string.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__) | |
end | |
# Unescape a string that has been HTML-escaped | |
# OLD_RUBY_CGI_ESCAPE::unescapeHTML("Usage: foo "bar" <baz>") | |
# # => "Usage: foo \"bar\" <baz>" | |
def unescapeHTML(string) | |
return string unless string.include? '&' | |
enc = string.encoding | |
if enc != Encoding::UTF_8 && [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc) | |
return string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do | |
case $1.encode(Encoding::US_ASCII) | |
when 'apos' then "'".encode(enc) | |
when 'amp' then '&'.encode(enc) | |
when 'quot' then '"'.encode(enc) | |
when 'gt' then '>'.encode(enc) | |
when 'lt' then '<'.encode(enc) | |
when /\A#0*(\d+)\z/ then $1.to_i.chr(enc) | |
when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc) | |
end | |
end | |
end | |
asciicompat = Encoding.compatible?(string, "a") | |
string.gsub(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do | |
match = $1.dup | |
case match | |
when 'apos' then "'" | |
when 'amp' then '&' | |
when 'quot' then '"' | |
when 'gt' then '>' | |
when 'lt' then '<' | |
when /\A#0*(\d+)\z/ | |
n = $1.to_i | |
if enc == Encoding::UTF_8 or | |
enc == Encoding::ISO_8859_1 && n < 256 or | |
asciicompat && n < 128 | |
n.chr(enc) | |
else | |
"&##{$1};" | |
end | |
when /\A#x([0-9a-f]+)\z/i | |
n = $1.hex | |
if enc == Encoding::UTF_8 or | |
enc == Encoding::ISO_8859_1 && n < 256 or | |
asciicompat && n < 128 | |
n.chr(enc) | |
else | |
"&#x#{$1};" | |
end | |
else | |
"&#{match};" | |
end | |
end | |
end | |
# Synonym for OLD_RUBY_CGI_ESCAPE::escapeHTML(str) | |
alias escape_html escapeHTML | |
# Synonym for OLD_RUBY_CGI_ESCAPE::unescapeHTML(str) | |
alias unescape_html unescapeHTML | |
# Escape only the tags of certain HTML elements in +string+. | |
# | |
# Takes an element or elements or array of elements. Each element | |
# is specified by the name of the element, without angle brackets. | |
# This matches both the start and the end tag of that element. | |
# The attribute list of the open tag will also be escaped (for | |
# instance, the double-quotes surrounding attribute values). | |
# | |
# print OLD_RUBY_CGI_ESCAPE::escapeElement('<BR><A HREF="url"></A>', "A", "IMG") | |
# # "<BR><A HREF="url"></A>" | |
# | |
# print OLD_RUBY_CGI_ESCAPE::escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"]) | |
# # "<BR><A HREF="url"></A>" | |
def escapeElement(string, *elements) | |
elements = elements[0] if elements[0].kind_of?(Array) | |
unless elements.empty? | |
string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do | |
OLD_RUBY_CGI_ESCAPE::escapeHTML($&) | |
end | |
else | |
string | |
end | |
end | |
# Undo escaping such as that done by OLD_RUBY_CGI_ESCAPE::escapeElement() | |
# | |
# print OLD_RUBY_CGI_ESCAPE::unescapeElement( | |
# OLD_RUBY_CGI_ESCAPE::escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG") | |
# # "<BR><A HREF="url"></A>" | |
# | |
# print OLD_RUBY_CGI_ESCAPE::unescapeElement( | |
# OLD_RUBY_CGI_ESCAPE::escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"]) | |
# # "<BR><A HREF="url"></A>" | |
def unescapeElement(string, *elements) | |
elements = elements[0] if elements[0].kind_of?(Array) | |
unless elements.empty? | |
string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do | |
unescapeHTML($&) | |
end | |
else | |
string | |
end | |
end | |
# Synonym for OLD_RUBY_CGI_ESCAPE::escapeElement(str) | |
alias escape_element escapeElement | |
# Synonym for OLD_RUBY_CGI_ESCAPE::unescapeElement(str) | |
alias unescape_element unescapeElement | |
# Abbreviated day-of-week names specified by RFC 822 | |
RFC822_DAYS = %w[ Sun Mon Tue Wed Thu Fri Sat ] | |
# Abbreviated month names specified by RFC 822 | |
RFC822_MONTHS = %w[ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ] | |
# Format a +Time+ object as a String using the format specified by RFC 1123. | |
# | |
# OLD_RUBY_CGI_ESCAPE::rfc1123_date(Time.now) | |
# # Sat, 01 Jan 2000 00:00:00 GMT | |
def rfc1123_date(time) | |
t = time.clone.gmtime | |
return format("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT", | |
RFC822_DAYS[t.wday], t.day, RFC822_MONTHS[t.month-1], t.year, | |
t.hour, t.min, t.sec) | |
end | |
# Prettify (indent) an HTML string. | |
# | |
# +string+ is the HTML string to indent. +shift+ is the indentation | |
# unit to use; it defaults to two spaces. | |
# | |
# print OLD_RUBY_CGI_ESCAPE::pretty("<HTML><BODY></BODY></HTML>") | |
# # <HTML> | |
# # <BODY> | |
# # </BODY> | |
# # </HTML> | |
# | |
# print OLD_RUBY_CGI_ESCAPE::pretty("<HTML><BODY></BODY></HTML>", "\t") | |
# # <HTML> | |
# # <BODY> | |
# # </BODY> | |
# # </HTML> | |
# | |
def pretty(string, shift = " ") | |
lines = string.gsub(/(?!\A)<.*?>/m, "\n\\0").gsub(/<.*?>(?!\n)/m, "\\0\n") | |
end_pos = 0 | |
while end_pos = lines.index(/^<\/(\w+)/, end_pos) | |
element = $1.dup | |
start_pos = lines.rindex(/^\s*<#{element}/i, end_pos) | |
lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__" | |
end | |
lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1') | |
end | |
alias h escapeHTML | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment